Update aosp/master LLVM for rebase to r235153

Change-Id: I9bf53792f9fc30570e81a8d80d296c681d005ea7 (cherry picked from commit 0c7f116bb6950ef819323d855415b2f2b0aad987)
author: Pirama Arumuga Nainar <pirama@google.com> 2015-05-06 11:46:36 -0700
committer: Pirama Arumuga Nainar <pirama@google.com> 2015-05-18 10:52:30 -0700
commit: 2c3e0051c31c3f5b2328b447eadf1cf9c4427442 (patch)
tree: c0104029af14e9f47c2ef58ca60e6137691f3c9b /lib
parent: e1bc145815f4334641be19f1c45ecf85d25b6e5a (diff)
download: external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.zip
external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.gz
external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.bz2
563 files changed, 17617 insertions, 8940 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 0b0fd50..43db176 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -82,6 +82,23 @@ void AliasAnalysis::addEscapingUse(Use &U) {
   AA->addEscapingUse(U);
 }
 
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+  // We may have two calls
+  if (auto CS = ImmutableCallSite(I)) {
+    // Check if the two calls modify the same memory
+    return getModRefInfo(Call, CS);
+  } else {
+    // Otherwise, check if the call modifies or references the
+    // location this memory access defines.  The best we can say
+    // is that if the call references what this instruction
+    // defines, it must be clobbered by this location.
+    const AliasAnalysis::Location DefLoc = AA->getLocation(I);
+    if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
+      return AliasAnalysis::ModRef;
+  }
+  return AliasAnalysis::NoModRef;
+}
 
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
@@ -330,7 +347,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
 
   // If the load address doesn't alias the given address, it doesn't read
   // or write the specified memory.
-  if (!alias(getLocation(L), Loc))
+  if (Loc.Ptr && !alias(getLocation(L), Loc))
     return NoModRef;
 
   // Otherwise, a load just reads.
@@ -343,15 +360,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
   if (!S->isUnordered())
     return ModRef;
 
-  // If the store address cannot alias the pointer in question, then the
-  // specified memory cannot be modified by the store.
-  if (!alias(getLocation(S), Loc))
-    return NoModRef;
+  if (Loc.Ptr) {
+    // If the store address cannot alias the pointer in question, then the
+    // specified memory cannot be modified by the store.
+    if (!alias(getLocation(S), Loc))
+      return NoModRef;
 
-  // If the pointer is a pointer to constant memory, then it could not have been
-  // modified by this store.
-  if (pointsToConstantMemory(Loc))
-    return NoModRef;
+    // If the pointer is a pointer to constant memory, then it could not have
+    // been modified by this store.
+    if (pointsToConstantMemory(Loc))
+      return NoModRef;
+
+  }
 
   // Otherwise, a store just writes.
   return Mod;
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 5865259..a1bfba1 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -44,7 +44,7 @@ namespace {
       errs() <<  "  " << Val << " " << Desc << " responses ("
              << Val*100/Sum << "%)\n";
     }
-    ~AliasAnalysisCounter() {
+    ~AliasAnalysisCounter() override {
       unsigned AASum = No+May+Partial+Must;
       unsigned MRSum = NoMR+JustRef+JustMod+MR;
       if (AASum + MRSum) { // Print a report if any counted queries occurred...
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index fe4bd4c..273eacc 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -158,7 +158,7 @@ bool AAEval::runOnFunction(Function &F) {
     if (EvalAAMD && isa<StoreInst>(&*I))
       Stores.insert(&*I);
     Instruction &Inst = *I;
-    if (CallSite CS = cast<Value>(&Inst)) {
+    if (auto CS = CallSite(&Inst)) {
       Value *Callee = CS.getCalledValue();
       // Skip actual functions for direct function calls.
       if (!isa<Function>(Callee) && isInterestingPointer(Callee))
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 45442b0..4c79b9f 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -187,7 +187,7 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
     return false;
 
   for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
-    CallSite C1 = getUnknownInst(i), C2 = Inst;
+    CallSite C1(getUnknownInst(i)), C2(Inst);
     if (!C1 || !C2 ||
         AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
         AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 4549c1e..842ff0a 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -37,6 +37,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeCFLAliasAnalysisPass(Registry);
   initializeDependenceAnalysisPass(Registry);
   initializeDelinearizationPass(Registry);
+  initializeDivergenceAnalysisPass(Registry);
   initializeDominanceFrontierPass(Registry);
   initializeDomViewerPass(Registry);
   initializeDomPrinterPass(Registry);
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index 277956c..94a1d2e 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -22,6 +22,7 @@ analysis_SRC_FILES := \
   CostModel.cpp \
   Delinearization.cpp \
   DependenceAnalysis.cpp \
+  DivergenceAnalysis.cpp \
   DomPrinter.cpp \
   DominanceFrontier.cpp \
   IVUsers.cpp \
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index be2282f..2767e41 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -932,14 +932,14 @@ aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
   // Also, check that they all index through arrays.
   for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
     if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
-            GEP1->getPointerOperandType(), IntermediateIndices)))
+            GEP1->getSourceElementType(), IntermediateIndices)))
       return AliasAnalysis::MayAlias;
     IntermediateIndices.push_back(GEP1->getOperand(i + 1));
   }
 
   StructType *LastIndexedStruct =
       dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
-          GEP1->getPointerOperandType(), IntermediateIndices));
+          GEP1->getSourceElementType(), IntermediateIndices));
 
   if (!LastIndexedStruct)
     return AliasAnalysis::MayAlias;
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 37f2fae..3d819eb 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -85,7 +85,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
     std::string Result;
     raw_string_ostream OS(Result);
 
-    OS << Node->getName().str() << ":";
+    OS << Node->getName() << ":";
     switch (ViewBlockFreqPropagationDAG) {
     case GVDT_Fraction:
       Graph->printBlockFreq(OS, Node);
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 278073c..456cee1 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -331,32 +331,35 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
   return true;
 }
 
-/// \brief Get the maximum allowed loop scale.
-///
-/// Gives the maximum number of estimated iterations allowed for a loop.  Very
-/// large numbers cause problems downstream (even within 64-bits).
-static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); }
-
 /// \brief Compute the loop scale for a loop.
 void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
   // Compute loop scale.
   DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
 
+  // Infinite loops need special handling. If we give the back edge an infinite
+  // mass, they may saturate all the other scales in the function down to 1,
+  // making all the other region temperatures look exactly the same. Choose an
+  // arbitrary scale to avoid these issues.
+  //
+  // FIXME: An alternate way would be to select a symbolic scale which is later
+  // replaced to be the maximum of all computed scales plus 1. This would
+  // appropriately describe the loop as having a large scale, without skewing
+  // the final frequency computation.
+  const Scaled64 InifiniteLoopScale(1, 12);
+
   // LoopScale == 1 / ExitMass
   // ExitMass == HeadMass - BackedgeMass
   BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
 
-  // Block scale stores the inverse of the scale.
-  Loop.Scale = ExitMass.toScaled().inverse();
+  // Block scale stores the inverse of the scale. If this is an infinite loop,
+  // its exit mass will be zero. In this case, use an arbitrary scale for the
+  // loop scale.
+  Loop.Scale =
+      ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
 
   DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
                << " - " << Loop.BackedgeMass << ")\n"
                << " - scale = " << Loop.Scale << "\n");
-
-  if (Loop.Scale > getMaxLoopScale()) {
-    Loop.Scale = getMaxLoopScale();
-    DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
-  }
 }
 
 /// \brief Package up a loop.
@@ -424,15 +427,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
                                      const Scaled64 &Min, const Scaled64 &Max) {
   // Scale the Factor to a size that creates integers.  Ideally, integers would
   // be scaled so that Max == UINT64_MAX so that they can be best
-  // differentiated.  However, the register allocator currently deals poorly
-  // with large numbers.  Instead, push Min up a little from 1 to give some
-  // room to differentiate small, unequal numbers.
-  //
-  // TODO: fix issues downstream so that ScalingFactor can be
-  // Scaled64(1,64)/Max.
-  Scaled64 ScalingFactor = Min.inverse();
-  if ((Max / Min).lg() < 60)
+  // differentiated.  However, in the presence of large frequency values, small
+  // frequencies are scaled down to 1, making it impossible to differentiate
+  // small, unequal numbers. When the spread between Min and Max frequencies
+  // fits well within MaxBits, we make the scale be at least 8.
+  const unsigned MaxBits = 64;
+  const unsigned SpreadBits = (Max / Min).lg();
+  Scaled64 ScalingFactor;
+  if (SpreadBits <= MaxBits - 3) {
+    // If the values are small enough, make the scaling factor at least 8 to
+    // allow distinguishing small values.
+    ScalingFactor = Min.inverse();
     ScalingFactor <<= 3;
+  } else {
+    // If the values need more than MaxBits to be represented, saturate small
+    // frequency values down to 1 by using a scaling factor that benefits large
+    // frequency values.
+    ScalingFactor = Scaled64(1, MaxBits) / Max;
+  }
 
   // Translate the floats to integers.
   DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 14800f4..8799a71 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -379,6 +379,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
   if (!CV)
     return false;
 
+  // If the LHS is the result of AND'ing a value with a single bit bitmask,
+  // we don't have information about probabilities.
+  if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
+    if (LHS->getOpcode() == Instruction::And)
+      if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
+        if (AndRHS->getUniqueInteger().isPowerOf2())
+          return false;
+
   bool isProb;
   if (CV->isZero()) {
     switch (CI->getPredicate()) {
@@ -499,25 +507,23 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
 
   // Walk the basic blocks in post-order so that we can build up state about
   // the successors of a block iteratively.
-  for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
-                                 E = po_end(&F.getEntryBlock());
-       I != E; ++I) {
-    DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
-    if (calcUnreachableHeuristics(*I))
+  for (auto BB : post_order(&F.getEntryBlock())) {
+    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+    if (calcUnreachableHeuristics(BB))
       continue;
-    if (calcMetadataWeights(*I))
+    if (calcMetadataWeights(BB))
       continue;
-    if (calcColdCallHeuristics(*I))
+    if (calcColdCallHeuristics(BB))
       continue;
-    if (calcLoopBranchHeuristics(*I))
+    if (calcLoopBranchHeuristics(BB))
       continue;
-    if (calcPointerHeuristics(*I))
+    if (calcPointerHeuristics(BB))
       continue;
-    if (calcZeroHeuristics(*I))
+    if (calcZeroHeuristics(BB))
       continue;
-    if (calcFloatingPointHeuristics(*I))
+    if (calcFloatingPointHeuristics(BB))
       continue;
-    calcInvokeHeuristics(*I);
+    calcInvokeHeuristics(BB);
   }
 
   PostDominatedByUnreachable.clear();
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 89787f82..c86f1f5 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -77,7 +77,7 @@ namespace {
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = "cfg." + F.getName().str() + ".dot";
+      std::string Filename = ("cfg." + F.getName() + ".dot").str();
       errs() << "Writing '" << Filename << "'...";
 
       std::error_code EC;
@@ -111,7 +111,7 @@ namespace {
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = "cfg." + F.getName().str() + ".dot";
+      std::string Filename = ("cfg." + F.getName() + ".dot").str();
       errs() << "Writing '" << Filename << "'...";
 
       std::error_code EC;
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
index 53d748d..3147992 100644
--- a/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -161,7 +161,7 @@ struct FunctionHandle : public CallbackVH {
     assert(CFLAA != nullptr);
   }
 
-  virtual ~FunctionHandle() {}
+  ~FunctionHandle() override {}
 
   void deleted() override { removeSelfFromCache(); }
   void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
@@ -189,7 +189,7 @@ public:
     initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
   }
 
-  virtual ~CFLAliasAnalysis() {}
+  ~CFLAliasAnalysis() override {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AliasAnalysis::getAnalysisUsage(AU);
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index ae40321..1335a6d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMAnalysis
   ConstantFolding.cpp
   Delinearization.cpp
   DependenceAnalysis.cpp
+  DivergenceAnalysis.cpp
   DomPrinter.cpp
   DominanceFrontier.cpp
   IVUsers.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 995465d..a85e813 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -671,8 +671,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
 /// If array indices are not pointer-sized integers, explicitly cast them so
 /// that they aren't implicitly casted by the getelementptr.
-static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
-                                const DataLayout &DL,
+static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops,
+                                Type *ResultTy, const DataLayout &DL,
                                 const TargetLibraryInfo *TLI) {
   Type *IntPtrTy = DL.getIntPtrType(ResultTy);
 
@@ -681,8 +681,9 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
     if ((i == 1 ||
          !isa<StructType>(GetElementPtrInst::getIndexedType(
-                            Ops[0]->getType(),
-                            Ops.slice(1, i - 1)))) &&
+             cast<PointerType>(Ops[0]->getType()->getScalarType())
+                 ->getElementType(),
+             Ops.slice(1, i - 1)))) &&
         Ops[i]->getType() != IntPtrTy) {
       Any = true;
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -697,7 +698,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
   if (!Any)
     return nullptr;
 
-  Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+  Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
       C = Folded;
@@ -723,7 +724,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
 }
 
 /// If we can symbolically evaluate the GEP constant expression, do so.
-static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
+static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops,
                                          Type *ResultTy, const DataLayout &DL,
                                          const TargetLibraryInfo *TLI) {
   Constant *Ptr = Ops[0];
@@ -865,7 +866,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
     return nullptr;
 
   // Create a GEP.
-  Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+  Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs);
   assert(C->getType()->getPointerElementType() == Ty &&
          "Computed GetElementPtr has unexpected type!");
 
@@ -1085,13 +1086,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
-  case Instruction::GetElementPtr:
-    if (Constant *C = CastGEPIndices(Ops, DestTy, DL, TLI))
+  case Instruction::GetElementPtr: {
+    Type *SrcTy = nullptr;
+    if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI))
       return C;
-    if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, DL, TLI))
+    if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI))
       return C;
 
-    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
+    return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1));
+  }
   }
 }
 
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
new file mode 100644
index 0000000..e5ee295
--- /dev/null
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -0,0 +1,337 @@
+//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines divergence analysis which determines whether a branch in a
+// GPU program is divergent. It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+//   Divergence Analysis
+//   Sampaio, Souza, Collange, Pereira
+//   TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+//   %cond = icmp slt i32 %tid, 10
+//   br i1 %cond, label %then, label %else
+// then:
+//   br label %merge
+// else:
+//   br label %merge
+// merge:
+//   %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+//    non-kernel-entry function and the return value of a function call as
+//    divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+//    generic or local address as divergent. This can be improved by leveraging
+//    pointer analysis.
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+#include "llvm/IR/Dominators.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "divergence"
+
+namespace {
+class DivergenceAnalysis : public FunctionPass {
+public:
+  static char ID;
+
+  DivergenceAnalysis() : FunctionPass(ID) {
+    initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<PostDominatorTree>();
+    AU.setPreservesAll();
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  // Print all divergent branches in the function.
+  void print(raw_ostream &OS, const Module *) const override;
+
+  // Returns true if V is divergent.
+  bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+  // Returns true if V is uniform/non-divergent.
+  bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+private:
+  // Stores all divergent values.
+  DenseSet<const Value *> DivergentValues;
+};
+} // End of anonymous namespace
+
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                    false, true)
+
+namespace {
+
+class DivergencePropagator {
+public:
+  DivergencePropagator(Function &F, TargetTransformInfo &TTI,
+                       DominatorTree &DT, PostDominatorTree &PDT,
+                       DenseSet<const Value *> &DV)
+      : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+  void populateWithSourcesOfDivergence();
+  void propagate();
+
+private:
+  // A helper function that explores data dependents of V.
+  void exploreDataDependency(Value *V);
+  // A helper function that explores sync dependents of TI.
+  void exploreSyncDependency(TerminatorInst *TI);
+  // Computes the influence region from Start to End. This region includes all
+  // basic blocks on any path from Start to End.
+  void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+                              DenseSet<BasicBlock *> &InfluenceRegion);
+  // Finds all users of I that are outside the influence region, and add these
+  // users to Worklist.
+  void findUsersOutsideInfluenceRegion(
+      Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+  Function &F;
+  TargetTransformInfo &TTI;
+  DominatorTree &DT;
+  PostDominatorTree &PDT;
+  std::vector<Value *> Worklist; // Stack for DFS.
+  DenseSet<const Value *> &DV; // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+  Worklist.clear();
+  DV.clear();
+  for (auto &I : inst_range(F)) {
+    if (TTI.isSourceOfDivergence(&I)) {
+      Worklist.push_back(&I);
+      DV.insert(&I);
+    }
+  }
+  for (auto &Arg : F.args()) {
+    if (TTI.isSourceOfDivergence(&Arg)) {
+      Worklist.push_back(&Arg);
+      DV.insert(&Arg);
+    }
+  }
+}
+
+void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
+  // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+  // immediate post dominator are divergent. This rule handles if-then-else
+  // patterns. For example,
+  //
+  // if (tid < 5)
+  //   a1 = 1;
+  // else
+  //   a2 = 2;
+  // a = phi(a1, a2); // sync dependent on (tid < 5)
+  BasicBlock *ThisBB = TI->getParent();
+  BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock();
+  if (IPostDom == nullptr)
+    return;
+
+  for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+    // A PHINode is uniform if it returns the same value no matter which path is
+    // taken.
+    if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second)
+      Worklist.push_back(I);
+  }
+
+  // Propagation rule 2: if a value defined in a loop is used outside, the user
+  // is sync dependent on the condition of the loop exits that dominate the
+  // user. For example,
+  //
+  // int i = 0;
+  // do {
+  //   i++;
+  //   if (foo(i)) ... // uniform
+  // } while (i < tid);
+  // if (bar(i)) ...   // divergent
+  //
+  // A program may contain unstructured loops. Therefore, we cannot leverage
+  // LoopInfo, which only recognizes natural loops.
+  //
+  // The algorithm used here handles both natural and unstructured loops.  Given
+  // a branch TI, we first compute its influence region, the union of all simple
+  // paths from TI to its immediate post dominator (IPostDom). Then, we search
+  // for all the values defined in the influence region but used outside. All
+  // these users are sync dependent on TI.
+  DenseSet<BasicBlock *> InfluenceRegion;
+  computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+  // An insight that can speed up the search process is that all the in-region
+  // values that are used outside must dominate TI. Therefore, instead of
+  // searching every basic blocks in the influence region, we search all the
+  // dominators of TI until it is outside the influence region.
+  BasicBlock *InfluencedBB = ThisBB;
+  while (InfluenceRegion.count(InfluencedBB)) {
+    for (auto &I : *InfluencedBB)
+      findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+    DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+    if (IDomNode == nullptr)
+      break;
+    InfluencedBB = IDomNode->getBlock();
+  }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+    Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+  for (User *U : I.users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (!InfluenceRegion.count(UserInst->getParent())) {
+      if (DV.insert(UserInst).second)
+        Worklist.push_back(UserInst);
+    }
+  }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+    BasicBlock *Start, BasicBlock *End,
+    DenseSet<BasicBlock *> &InfluenceRegion) {
+  assert(PDT.properlyDominates(End, Start) &&
+         "End does not properly dominate Start");
+  std::vector<BasicBlock *> InfluenceStack;
+  InfluenceStack.push_back(Start);
+  InfluenceRegion.insert(Start);
+  while (!InfluenceStack.empty()) {
+    BasicBlock *BB = InfluenceStack.back();
+    InfluenceStack.pop_back();
+    for (BasicBlock *Succ : successors(BB)) {
+      if (End != Succ && InfluenceRegion.insert(Succ).second)
+        InfluenceStack.push_back(Succ);
+    }
+  }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+  // Follow def-use chains of V.
+  for (User *U : V->users()) {
+    Instruction *UserInst = cast<Instruction>(U);
+    if (DV.insert(UserInst).second)
+      Worklist.push_back(UserInst);
+  }
+}
+
+void DivergencePropagator::propagate() {
+  // Traverse the dependency graph using DFS.
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
+      // Terminators with less than two successors won't introduce sync
+      // dependency. Ignore them.
+      if (TI->getNumSuccessors() > 1)
+        exploreSyncDependency(TI);
+    }
+    exploreDataDependency(V);
+  }
+}
+
+} /// end namespace anonymous
+
+FunctionPass *llvm::createDivergenceAnalysisPass() {
+  return new DivergenceAnalysis();
+}
+
+bool DivergenceAnalysis::runOnFunction(Function &F) {
+  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+  if (TTIWP == nullptr)
+    return false;
+
+  TargetTransformInfo &TTI = TTIWP->getTTI(F);
+  // Fast path: if the target does not have branch divergence, we do not mark
+  // any branch as divergent.
+  if (!TTI.hasBranchDivergence())
+    return false;
+
+  DivergentValues.clear();
+  DivergencePropagator DP(F, TTI,
+                          getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+                          getAnalysis<PostDominatorTree>(), DivergentValues);
+  DP.populateWithSourcesOfDivergence();
+  DP.propagate();
+  return false;
+}
+
+void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+  if (DivergentValues.empty())
+    return;
+  const Value *FirstDivergentValue = *DivergentValues.begin();
+  const Function *F;
+  if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+    F = Arg->getParent();
+  } else if (const Instruction *I =
+                 dyn_cast<Instruction>(FirstDivergentValue)) {
+    F = I->getParent()->getParent();
+  } else {
+    llvm_unreachable("Only arguments and instructions can be divergent");
+  }
+
+  // Dumps all divergent values in F, arguments and then instructions.
+  for (auto &Arg : F->args()) {
+    if (DivergentValues.count(&Arg))
+      OS << "DIVERGENT:  " << Arg << "\n";
+  }
+  // Iterate instructions using inst_range to ensure a deterministic order.
+  for (auto &I : inst_range(F)) {
+    if (DivergentValues.count(&I))
+      OS << "DIVERGENT:" << I << "\n";
+  }
+}
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 9d607cc..65ba1c7 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -212,10 +212,13 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
           // list of the same call.
           CallSites.count(I->first) ||
 
-          // If the call edge is not from a call or invoke, then the function
-          // pass RAUW'd a call with another value.  This can happen when
-          // constant folding happens of well known functions etc.
-          !CallSite(I->first)) {
+          // If the call edge is not from a call or invoke, or it is a
+          // instrinsic call, then the function pass RAUW'd a call with 
+          // another value. This can happen when constant folding happens
+          // of well known functions etc.
+          !CallSite(I->first) ||
+           (CallSite(I->first).getCalledFunction() &&
+            CallSite(I->first).getCalledFunction()->isIntrinsic())) {
         assert(!CheckingMode &&
                "CallGraphSCCPass did not update the CallGraph correctly!");
         
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 2208f32..018ae99 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -269,7 +269,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
     } else if (Operator::getOpcode(I) == Instruction::BitCast) {
       if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
         return true;
-    } else if (CallSite CS = I) {
+    } else if (auto CS = CallSite(I)) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
       if (!CS.isCallee(&U)) {
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index eeb3b87..cacf70d 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -64,6 +64,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool ContainsNoDuplicateCall;
   bool HasReturn;
   bool HasIndirectBr;
+  bool HasFrameEscape;
 
   /// Number of bytes allocated statically by the callee.
   uint64_t AllocatedSize;
@@ -148,12 +149,12 @@ public:
         IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
-        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
-        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
-        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
-        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
-        NumInstructionsSimplified(0), SROACostSavings(0),
-        SROACostSavingsLost(0) {}
+        HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+        SROACostSavings(0), SROACostSavingsLost(0) {}
 
   bool analyzeCall(CallSite CS);
 
@@ -743,6 +744,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
       case Intrinsic::memmove:
         // SROA can usually chew through these intrinsics, but they aren't free.
         return false;
+      case Intrinsic::frameescape:
+        HasFrameEscape = true;
+        return false;
       }
     }
 
@@ -941,7 +945,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
 
     // If the visit this instruction detected an uninlinable pattern, abort.
     if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
-        HasIndirectBr)
+        HasIndirectBr || HasFrameEscape)
       return false;
 
     // If the caller is a recursive function then we don't want to inline
@@ -1171,7 +1175,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
     // returns false, and we can bail on out.
     if (!analyzeBlock(BB, EphValues)) {
       if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
-          HasIndirectBr)
+          HasIndirectBr || HasFrameEscape)
         return false;
 
       // If the caller is a recursive function then we don't want to inline
@@ -1286,16 +1290,18 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
 
 /// \brief Test that two functions either have or have not the given attribute
 ///        at the same time.
-static bool attributeMatches(Function *F1, Function *F2,
-                             Attribute::AttrKind Attr) {
-  return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+template<typename AttrKind>
+static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
+  return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
 }
 
 /// \brief Test that there are no attribute conflicts between Caller and Callee
 ///        that prevent inlining.
 static bool functionsHaveCompatibleAttributes(Function *Caller,
                                               Function *Callee) {
-  return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+  return attributeMatches(Caller, Callee, "target-cpu") &&
+         attributeMatches(Caller, Callee, "target-features") &&
+         attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
          attributeMatches(Caller, Callee, Attribute::SanitizeThread);
 }
@@ -1370,6 +1376,13 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
       if (!ReturnsTwice && CS.isCall() &&
           cast<CallInst>(CS.getInstruction())->canReturnTwice())
         return false;
+
+      // Disallow inlining functions that call @llvm.frameescape. Doing this
+      // correctly would require major changes to the inliner.
+      if (CS.getCalledFunction() &&
+          CS.getCalledFunction()->getIntrinsicID() ==
+              llvm::Intrinsic::frameescape)
+        return false;
     }
   }
 
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 99c477d..d45f7bd 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2978,10 +2978,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // what constant folding can make out of it.
         Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
         SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
-        Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS);
+        Constant *NewLHS = ConstantExpr::getGetElementPtr(
+            GLHS->getSourceElementType(), Null, IndicesLHS);
 
         SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
-        Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS);
+        Constant *NewRHS = ConstantExpr::getGetElementPtr(
+            GLHS->getSourceElementType(), Null, IndicesRHS);
         return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
       }
     }
@@ -3241,17 +3243,18 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
 
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
+static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+                              const Query &Q, unsigned) {
   // The type of the GEP pointer operand.
-  PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType());
-  unsigned AS = PtrTy->getAddressSpace();
+  unsigned AS =
+      cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace();
 
   // getelementptr P -> P.
   if (Ops.size() == 1)
     return Ops[0];
 
   // Compute the (pointer) type returned by the GEP instruction.
-  Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+  Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
   Type *GEPTy = PointerType::get(LastType, AS);
   if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
     GEPTy = VectorType::get(GEPTy, VT->getNumElements());
@@ -3264,7 +3267,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
     if (match(Ops[1], m_Zero()))
       return Ops[0];
 
-    Type *Ty = PtrTy->getElementType();
+    Type *Ty = SrcTy;
     if (Ty->isSized()) {
       Value *P;
       uint64_t C;
@@ -3318,14 +3321,17 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
     if (!isa<Constant>(Ops[i]))
       return nullptr;
 
-  return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
+  return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
+                                        Ops.slice(1));
 }
 
 Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, AssumptionCache *AC,
                              const Instruction *CxtI) {
-  return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+  return ::SimplifyGEPInst(
+      cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(),
+      Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
 /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 1818e93..724c21f 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -177,6 +177,17 @@ void LoopAccessInfo::RuntimePointerCheck::print(
       }
 }
 
+bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+    const SmallVectorImpl<int> *PtrPartition) const {
+  unsigned NumPointers = Pointers.size();
+
+  for (unsigned I = 0; I < NumPointers; ++I)
+    for (unsigned J = I + 1; J < NumPointers; ++J)
+      if (needsChecking(I, J, PtrPartition))
+        return true;
+  return false;
+}
+
 namespace {
 /// \brief Analyses memory accesses in a loop.
 ///
@@ -1033,16 +1044,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
   for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
     StoreInst *ST = cast<StoreInst>(*I);
     Value* Ptr = ST->getPointerOperand();
-
-    if (isUniform(Ptr)) {
-      emitAnalysis(
-          LoopAccessReport(ST)
-          << "write to a loop invariant address could not be vectorized");
-      DEBUG(dbgs() << "LAA: We don't allow storing to uniform addresses\n");
-      CanVecMem = false;
-      return;
-    }
-
+    // Check for store to loop invariant address.
+    StoreToLoopInvariantAddress |= isUniform(Ptr);
     // If we did *not* see this pointer before, insert it to  the read-write
     // list. At this phase it is only a 'write' list.
     if (Seen.insert(Ptr).second) {
@@ -1211,9 +1214,8 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
 
 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
     Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
-  Instruction *tnullptr = nullptr;
   if (!PtrRtCheck.Need)
-    return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
+    return std::make_pair(nullptr, nullptr);
 
   unsigned NumPointers = PtrRtCheck.Pointers.size();
   SmallVector<TrackingVH<Value> , 2> Starts;
@@ -1284,6 +1286,9 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
     }
   }
 
+  if (!MemoryRuntimeCheck)
+    return std::make_pair(nullptr, nullptr);
+
   // We have to do this trickery because the IRBuilder might fold the check to a
   // constant expression in which case there is no Instruction anchored in a
   // the block.
@@ -1301,19 +1306,24 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const ValueToValueMap &Strides)
     : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
       TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
-      MaxSafeDepDistBytes(-1U), CanVecMem(false) {
+      MaxSafeDepDistBytes(-1U), CanVecMem(false),
+      StoreToLoopInvariantAddress(false) {
   if (canAnalyzeLoop())
     analyzeLoop(Strides);
 }
 
 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   if (CanVecMem) {
-    if (PtrRtCheck.empty())
-      OS.indent(Depth) << "Memory dependences are safe\n";
-    else
+    if (PtrRtCheck.Need)
       OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
+    else
+      OS.indent(Depth) << "Memory dependences are safe\n";
   }
 
+  OS.indent(Depth) << "Store to invariant address was "
+                   << (StoreToLoopInvariantAddress ? "" : "not ")
+                   << "found in loop.\n";
+
   if (Report)
     OS.indent(Depth) << "Report: " << Report->str() << "\n";
 
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index e1b7b4b..da3b829 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
     if (!Res.isNonLocal()) {
       Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
                                        static_cast<BasicBlock *>(nullptr)));
-    } else if (CallSite CS = cast<Value>(Inst)) {
+    } else if (auto CS = CallSite(Inst)) {
       const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
         MDA.getNonLocalCallDependency(CS);
 
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 716e3e6..84769cb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -223,7 +223,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
       continue;
     }
 
-    if (CallSite InstCS = cast<Value>(Inst)) {
+    if (auto InstCS = CallSite(Inst)) {
       // Debug intrinsics don't cause dependences.
       if (isa<DbgInfoIntrinsic>(Inst)) continue;
       // If these two calls do not interfere, look past it.
@@ -874,23 +874,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 void MemoryDependenceAnalysis::
 getNonLocalPointerDependency(Instruction *QueryInst,
                              SmallVectorImpl<NonLocalDepResult> &Result) {
-
-  auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) {
-    if (auto *I = dyn_cast<LoadInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<StoreInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<VAArgInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
-      return AA->getLocation(I);
-    else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
-      return AA->getLocation(I);
-    else
-      llvm_unreachable("unsupported memory instruction");
-  };
-   
-  const AliasAnalysis::Location Loc = getLocation(AA, QueryInst);
+  const AliasAnalysis::Location Loc = AA->getLocation(QueryInst);
   bool isLoad = isa<LoadInst>(QueryInst);
   BasicBlock *FromBB = QueryInst->getParent();
   assert(FromBB);
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index cbc4700..f2a11cb 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -72,55 +72,53 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
   // Printing the nodes directly isn't particularly helpful (since they
   // reference other nodes that won't be printed, particularly for the
   // filenames), so just print a few useful things.
-  for (DICompileUnit CU : Finder.compile_units()) {
+  for (MDCompileUnit *CU : Finder.compile_units()) {
     O << "Compile unit: ";
-    if (const char *Lang = LanguageString(CU.getLanguage()))
+    if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
       O << Lang;
     else
-      O << "unknown-language(" << CU.getLanguage() << ")";
-    printFile(O, CU.getFilename(), CU.getDirectory());
+      O << "unknown-language(" << CU->getSourceLanguage() << ")";
+    printFile(O, CU->getFilename(), CU->getDirectory());
     O << '\n';
   }
 
-  for (DISubprogram S : Finder.subprograms()) {
-    O << "Subprogram: " << S.getName();
-    printFile(O, S.getFilename(), S.getDirectory(), S.getLineNumber());
-    if (!S.getLinkageName().empty())
-      O << " ('" << S.getLinkageName() << "')";
+  for (MDSubprogram *S : Finder.subprograms()) {
+    O << "Subprogram: " << S->getName();
+    printFile(O, S->getFilename(), S->getDirectory(), S->getLine());
+    if (!S->getLinkageName().empty())
+      O << " ('" << S->getLinkageName() << "')";
     O << '\n';
   }
 
   for (DIGlobalVariable GV : Finder.global_variables()) {
-    O << "Global variable: " << GV.getName();
-    printFile(O, GV.getFilename(), GV.getDirectory(), GV.getLineNumber());
-    if (!GV.getLinkageName().empty())
-      O << " ('" << GV.getLinkageName() << "')";
+    O << "Global variable: " << GV->getName();
+    printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
+    if (!GV->getLinkageName().empty())
+      O << " ('" << GV->getLinkageName() << "')";
     O << '\n';
   }
 
-  for (DIType T : Finder.types()) {
+  for (const MDType *T : Finder.types()) {
     O << "Type:";
-    if (!T.getName().empty())
-      O << ' ' << T.getName();
-    printFile(O, T.getFilename(), T.getDirectory(), T.getLineNumber());
-    if (T.isBasicType()) {
-      DIBasicType BT(T.get());
+    if (!T->getName().empty())
+      O << ' ' << T->getName();
+    printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
+    if (auto *BT = dyn_cast<MDBasicType>(T)) {
       O << " ";
       if (const char *Encoding =
-              dwarf::AttributeEncodingString(BT.getEncoding()))
+              dwarf::AttributeEncodingString(BT->getEncoding()))
         O << Encoding;
       else
-        O << "unknown-encoding(" << BT.getEncoding() << ')';
+        O << "unknown-encoding(" << BT->getEncoding() << ')';
     } else {
       O << ' ';
-      if (const char *Tag = dwarf::TagString(T.getTag()))
+      if (const char *Tag = dwarf::TagString(T->getTag()))
         O << Tag;
       else
-        O << "unknown-tag(" << T.getTag() << ")";
+        O << "unknown-tag(" << T->getTag() << ")";
     }
-    if (T.isCompositeType()) {
-      DICompositeType CT(T.get());
-      if (auto *S = CT.getIdentifier())
+    if (auto *CT = dyn_cast<MDCompositeType>(T)) {
+      if (auto *S = CT->getRawIdentifier())
         O << " (identifier: '" << S->getString() << "')";
     }
     O << '\n';
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index cd1e944..5e1cdd4 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -199,7 +199,7 @@ public:
 
   bool runOnRegion(Region *R, RGPassManager &RGM) override {
     Out << Banner;
-    for (const auto &BB : R->blocks()) {
+    for (const auto *BB : R->blocks()) {
       if (BB)
         BB->print(Out);
       else
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index ad83113..d7f5109 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -123,7 +123,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
 
     const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo());
 
-    for (const auto &BB : R.blocks())
+    for (auto *BB : R.blocks())
       if (RI.getRegionFor(BB) == &R)
         O.indent(2 * (depth + 1)) << "Node"
           << static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB))
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 4e713fb..37377f0 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -5690,7 +5690,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
             if (PTy->getElementType()->isStructTy())
               C2 = ConstantExpr::getIntegerCast(
                   C2, Type::getInt32Ty(C->getContext()), true);
-            C = ConstantExpr::getGetElementPtr(C, C2);
+            C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
           } else
             C = ConstantExpr::getAdd(C, C2);
         }
@@ -6698,6 +6698,65 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
       return true;
   }
 
+  struct ClearWalkingBEDominatingCondsOnExit {
+    ScalarEvolution &SE;
+
+    explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
+        : SE(SE){};
+
+    ~ClearWalkingBEDominatingCondsOnExit() {
+      SE.WalkingBEDominatingConds = false;
+    }
+  };
+
+  // We don't want more than one activation of the following loop on the stack
+  // -- that can lead to O(n!) time complexity.
+  if (WalkingBEDominatingConds)
+    return false;
+
+  WalkingBEDominatingConds = true;
+  ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
+
+  // If the loop is not reachable from the entry block, we risk running into an
+  // infinite loop as we walk up into the dom tree.  These loops do not matter
+  // anyway, so we just return a conservative answer when we see them.
+  if (!DT->isReachableFromEntry(L->getHeader()))
+    return false;
+
+  for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
+       DTN != HeaderDTN;
+       DTN = DTN->getIDom()) {
+
+    assert(DTN && "should reach the loop header before reaching the root!");
+
+    BasicBlock *BB = DTN->getBlock();
+    BasicBlock *PBB = BB->getSinglePredecessor();
+    if (!PBB)
+      continue;
+
+    BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
+    if (!ContinuePredicate || !ContinuePredicate->isConditional())
+      continue;
+
+    Value *Condition = ContinuePredicate->getCondition();
+
+    // If we have an edge `E` within the loop body that dominates the only
+    // latch, the condition guarding `E` also guards the backedge.  This
+    // reasoning works only for loops with a single latch.
+
+    BasicBlockEdge DominatingEdge(PBB, BB);
+    if (DominatingEdge.isSingleEdge()) {
+      // We're constructively (and conservatively) enumerating edges within the
+      // loop body that dominate the latch.  The dominator tree better agree
+      // with us on this:
+      assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+
+      if (isImpliedCond(Pred, LHS, RHS, Condition,
+                        BB != ContinuePredicate->getSuccessor(0)))
+        return true;
+    }
+  }
+
   return false;
 }
 
@@ -7968,8 +8027,8 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution()
-  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
-    BlockDispositions(64), FirstUnknown(nullptr) {
+    : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
+      LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
   initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
 }
 
@@ -8000,6 +8059,7 @@ void ScalarEvolution::releaseMemory() {
   }
 
   assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+  assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
 
   BackedgeTakenCounts.clear();
   ConstantEvolutionLoopExitValue.clear();
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index a73ec9e..0bd427b 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -488,7 +489,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     // Fold a GEP with constant operands.
     if (Constant *CLHS = dyn_cast<Constant>(V))
       if (Constant *CRHS = dyn_cast<Constant>(Idx))
-        return ConstantExpr::getGetElementPtr(CLHS, CRHS);
+        return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
+                                              CLHS, CRHS);
 
     // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
     unsigned ScanLimit = 6;
@@ -523,7 +525,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     }
 
     // Emit a GEP.
-    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+    Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
     rememberInstruction(GEP);
 
     return GEP;
@@ -1803,6 +1805,72 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   return NumElim;
 }
 
+bool SCEVExpander::isHighCostExpansionHelper(
+    const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+  if (!Processed.insert(S).second)
+    return false;
+
+  if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
+    // If the divisor is a power of two and the SCEV type fits in a native
+    // integer, consider the divison cheap irrespective of whether it occurs in
+    // the user code since it can be lowered into a right shift.
+    if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
+      if (SC->getValue()->getValue().isPowerOf2()) {
+        const DataLayout &DL =
+            L->getHeader()->getParent()->getParent()->getDataLayout();
+        unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
+        return DL.isIllegalInteger(Width);
+      }
+
+    // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+    // HowManyLessThans produced to compute a precise expression, rather than a
+    // UDiv from the user's code. If we can't find a UDiv in the code with some
+    // simple searching, assume the former consider UDivExpr expensive to
+    // compute.
+    BasicBlock *ExitingBB = L->getExitingBlock();
+    if (!ExitingBB)
+      return true;
+
+    BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+    if (!ExitingBI || !ExitingBI->isConditional())
+      return true;
+
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
+    if (!OrigCond)
+      return true;
+
+    const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
+    RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
+    if (RHS != S) {
+      const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
+      LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
+      if (LHS != S)
+        return true;
+    }
+  }
+
+  // Recurse past add expressions, which commonly occur in the
+  // BackedgeTakenCount. They may already exist in program code, and if not,
+  // they are not too expensive rematerialize.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I) {
+      if (isHighCostExpansionHelper(*I, L, Processed))
+        return true;
+    }
+    return false;
+  }
+
+  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+  // the exit condition.
+  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+    return true;
+
+  // If we haven't recognized an expensive SCEV pattern, assume it's an
+  // expression produced by program code.
+  return false;
+}
+
 namespace {
 // Search for a SCEV subexpression that is not safe to expand.  Any expression
 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 7e574d5..8b378a3 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -409,9 +409,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
 
   // Check for \01 prefix that is used to mangle __asm declarations and
   // strip it if present.
-  if (funcName.front() == '\01')
-    funcName = funcName.substr(1);
-  return funcName;
+  return GlobalValue::getRealLinkageName(funcName);
 }
 
 bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index f51c7f54..a1519de 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -76,6 +76,10 @@ bool TargetTransformInfo::hasBranchDivergence() const {
   return TTIImpl->hasBranchDivergence();
 }
 
+bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
+  return TTIImpl->isSourceOfDivergence(V);
+}
+
 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
   return TTIImpl->isLoweredToCall(F);
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index f329e3a..3651301 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -694,10 +694,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
     // We're running this loop for once for each value queried resulting in a
     // runtime of ~O(#assumes * #values).
 
-    assert(isa<IntrinsicInst>(I) &&
-           dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume &&
+    assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
            "must be an assume intrinsic");
-    
+
     Value *Arg = I->getArgOperand(0);
 
     if (Arg == V && isValidAssumeForContext(I, Q)) {
@@ -2935,7 +2934,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
   if (const LoadInst *LI = dyn_cast<LoadInst>(V))
     return LI->getMetadata(LLVMContext::MD_nonnull);
 
-  if (ImmutableCallSite CS = V)
+  if (auto CS = ImmutableCallSite(V))
     if (CS.isReturnNonNull())
       return true;
 
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 3bf090a..a72f713 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -598,6 +598,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(inalloca);
   KEYWORD(cold);
   KEYWORD(dereferenceable);
+  KEYWORD(dereferenceable_or_null);
   KEYWORD(inlinehint);
   KEYWORD(inreg);
   KEYWORD(jumptable);
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 3343168..90bf17d 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -45,7 +45,6 @@ namespace llvm {
   public:
     explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &,
                      LLVMContext &C);
-    ~LLLexer() {}
 
     lltok::Kind Lex() {
       return CurKind = LexToken();
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 103c8c4..546363b 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -976,6 +976,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
       break;
     case lltok::kw_byval:
     case lltok::kw_dereferenceable:
+    case lltok::kw_dereferenceable_or_null:
     case lltok::kw_inalloca:
     case lltok::kw_nest:
     case lltok::kw_noalias:
@@ -1220,11 +1221,18 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_byval:           B.addAttribute(Attribute::ByVal); break;
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
-      if (ParseOptionalDereferenceableBytes(Bytes))
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
         return true;
       B.addDereferenceableAttr(Bytes);
       continue;
     }
+    case lltok::kw_dereferenceable_or_null: {
+      uint64_t Bytes;
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
+        return true;
+      B.addDereferenceableOrNullAttr(Bytes);
+      continue;
+    }
     case lltok::kw_inalloca:        B.addAttribute(Attribute::InAlloca); break;
     case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
     case lltok::kw_nest:            B.addAttribute(Attribute::Nest); break;
@@ -1284,11 +1292,18 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
       return HaveError;
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
-      if (ParseOptionalDereferenceableBytes(Bytes))
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
         return true;
       B.addDereferenceableAttr(Bytes);
       continue;
     }
+    case lltok::kw_dereferenceable_or_null: {
+      uint64_t Bytes;
+      if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable_or_null, Bytes))
+        return true;
+      B.addDereferenceableOrNullAttr(Bytes);
+      continue;
+    }
     case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
     case lltok::kw_noalias:         B.addAttribute(Attribute::NoAlias); break;
     case lltok::kw_nonnull:         B.addAttribute(Attribute::NonNull); break;
@@ -1516,12 +1531,19 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
   return false;
 }
 
-/// ParseOptionalDereferenceableBytes
+/// ParseOptionalDerefAttrBytes
 ///   ::= /* empty */
-///   ::= 'dereferenceable' '(' 4 ')'
-bool LLParser::ParseOptionalDereferenceableBytes(uint64_t &Bytes) {
+///   ::= AttrKind '(' 4 ')'
+///
+/// where AttrKind is either 'dereferenceable' or 'dereferenceable_or_null'.
+bool LLParser::ParseOptionalDerefAttrBytes(lltok::Kind AttrKind,
+                                           uint64_t &Bytes) {
+  assert((AttrKind == lltok::kw_dereferenceable ||
+          AttrKind == lltok::kw_dereferenceable_or_null) &&
+         "contract!");
+
   Bytes = 0;
-  if (!EatIfPresent(lltok::kw_dereferenceable))
+  if (!EatIfPresent(AttrKind))
     return false;
   LocTy ParenLoc = Lex.getLoc();
   if (!EatIfPresent(lltok::lparen))
@@ -2831,10 +2853,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
           !BasePointerType->getElementType()->isSized(&Visited))
         return Error(ID.Loc, "base element of getelementptr must be sized");
 
-      if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices))
+      if (!GetElementPtrInst::getIndexedType(Ty, Indices))
         return Error(ID.Loc, "invalid getelementptr indices");
-      ID.ConstantVal = ConstantExpr::getGetElementPtr(Elts[0], Indices,
-                                                      InBounds);
+      ID.ConstantVal =
+          ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices, InBounds);
     } else if (Opc == Instruction::Select) {
       if (Elts.size() != 3)
         return Error(ID.Loc, "expected three operands to select");
@@ -3030,13 +3052,17 @@ struct MDBoolField : public MDFieldImpl<bool> {
   MDBoolField(bool Default = false) : ImplTy(Default) {}
 };
 struct MDField : public MDFieldImpl<Metadata *> {
-  MDField() : ImplTy(nullptr) {}
+  bool AllowNull;
+
+  MDField(bool AllowNull = true) : ImplTy(nullptr), AllowNull(AllowNull) {}
 };
 struct MDConstant : public MDFieldImpl<ConstantAsMetadata *> {
   MDConstant() : ImplTy(nullptr) {}
 };
-struct MDStringField : public MDFieldImpl<std::string> {
-  MDStringField() : ImplTy(std::string()) {}
+struct MDStringField : public MDFieldImpl<MDString *> {
+  bool AllowEmpty;
+  MDStringField(bool AllowEmpty = true)
+      : ImplTy(nullptr), AllowEmpty(AllowEmpty) {}
 };
 struct MDFieldList : public MDFieldImpl<SmallVector<Metadata *, 4>> {
   MDFieldList() : ImplTy(SmallVector<Metadata *, 4>()) {}
@@ -3161,7 +3187,7 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
     if (Lex.getKind() != lltok::DIFlag)
       return TokError("expected debug info flag");
 
-    Val = DIDescriptor::getFlag(Lex.getStrVal());
+    Val = DebugNode::getFlag(Lex.getStrVal());
     if (!Val)
       return TokError(Twine("invalid debug info flag flag '") +
                       Lex.getStrVal() + "'");
@@ -3221,6 +3247,8 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDBoolField &Result) {
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDField &Result) {
   if (Lex.getKind() == lltok::kw_null) {
+    if (!Result.AllowNull)
+      return TokError("'" + Name + "' cannot be null");
     Lex.Lex();
     Result.assign(nullptr);
     return false;
@@ -3246,11 +3274,15 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDConstant &Result) {
 
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDStringField &Result) {
+  LocTy ValueLoc = Lex.getLoc();
   std::string S;
   if (ParseStringConstant(S))
     return true;
 
-  Result.assign(std::move(S));
+  if (!Result.AllowEmpty && S.empty())
+    return Error(ValueLoc, "'" + Name + "' cannot be empty");
+
+  Result.assign(S.empty() ? nullptr : MDString::get(Context, S));
   return false;
 }
 
@@ -3343,7 +3375,7 @@ bool LLParser::ParseMDLocation(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(column, ColumnField, );                                             \
-  REQUIRED(scope, MDField, );                                                  \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(inlinedAt, MDField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
@@ -3499,7 +3531,7 @@ bool LLParser::ParseMDFile(MDNode *&Result, bool IsDistinct) {
 bool LLParser::ParseMDCompileUnit(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(language, DwarfLangField, );                                        \
-  REQUIRED(file, MDField, );                                                   \
+  REQUIRED(file, MDField, (/* AllowNull */ false));                            \
   OPTIONAL(producer, MDStringField, );                                         \
   OPTIONAL(isOptimized, MDBoolField, );                                        \
   OPTIONAL(flags, MDStringField, );                                            \
@@ -3567,7 +3599,7 @@ bool LLParser::ParseMDSubprogram(MDNode *&Result, bool IsDistinct) {
 ///   ::= !MDLexicalBlock(scope: !0, file: !2, line: 7, column: 9)
 bool LLParser::ParseMDLexicalBlock(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(scope, MDField, );                                                  \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(column, ColumnField, );
@@ -3583,7 +3615,7 @@ bool LLParser::ParseMDLexicalBlock(MDNode *&Result, bool IsDistinct) {
 ///   ::= !MDLexicalBlockFile(scope: !0, file: !2, discriminator: 9)
 bool LLParser::ParseMDLexicalBlockFile(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(scope, MDField, );                                                  \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(file, MDField, );                                                   \
   REQUIRED(discriminator, MDUnsignedField, (0, UINT32_MAX));
   PARSE_MD_FIELDS();
@@ -3648,8 +3680,8 @@ bool LLParser::ParseMDTemplateValueParameter(MDNode *&Result, bool IsDistinct) {
 ///                         declaration: !3)
 bool LLParser::ParseMDGlobalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(name, MDStringField, (/* AllowEmpty */ false));                     \
   OPTIONAL(scope, MDField, );                                                  \
-  OPTIONAL(name, MDStringField, );                                             \
   OPTIONAL(linkageName, MDStringField, );                                      \
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(line, LineField, );                                                 \
@@ -3670,25 +3702,23 @@ bool LLParser::ParseMDGlobalVariable(MDNode *&Result, bool IsDistinct) {
 
 /// ParseMDLocalVariable:
 ///   ::= !MDLocalVariable(tag: DW_TAG_arg_variable, scope: !0, name: "foo",
-///                        file: !1, line: 7, type: !2, arg: 2, flags: 7,
-///                        inlinedAt: !3)
+///                        file: !1, line: 7, type: !2, arg: 2, flags: 7)
 bool LLParser::ParseMDLocalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(tag, DwarfTagField, );                                              \
-  OPTIONAL(scope, MDField, );                                                  \
+  REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(name, MDStringField, );                                             \
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(type, MDField, );                                                   \
   OPTIONAL(arg, MDUnsignedField, (0, UINT8_MAX));                              \
-  OPTIONAL(flags, DIFlagField, );                                              \
-  OPTIONAL(inlinedAt, MDField, );
+  OPTIONAL(flags, DIFlagField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
-  Result = GET_OR_DISTINCT(
-      MDLocalVariable, (Context, tag.Val, scope.Val, name.Val, file.Val,
-                        line.Val, type.Val, arg.Val, flags.Val, inlinedAt.Val));
+  Result = GET_OR_DISTINCT(MDLocalVariable,
+                           (Context, tag.Val, scope.Val, name.Val, file.Val,
+                            line.Val, type.Val, arg.Val, flags.Val));
   return false;
 }
 
@@ -5130,10 +5160,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   // If RetType is a non-function pointer type, then this is the short syntax
   // for the call, which means that RetType is just the return type.  Infer the
   // rest of the function argument types from the arguments that are present.
-  PointerType *PFTy = nullptr;
-  FunctionType *Ty = nullptr;
-  if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
-      !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+  FunctionType *Ty = dyn_cast<FunctionType>(RetType);
+  if (!Ty) {
     // Pull out the types of all of the arguments...
     std::vector<Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
@@ -5143,12 +5171,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
       return Error(RetTypeLoc, "Invalid result type for LLVM function");
 
     Ty = FunctionType::get(RetType, ParamTypes, false);
-    PFTy = PointerType::getUnqual(Ty);
   }
 
   // Look up the callee.
   Value *Callee;
-  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+  if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
+    return true;
 
   // Set up the Attribute for the function.
   SmallVector<AttributeSet, 8> Attrs;
@@ -5269,7 +5297,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
     Lex.Lex();
   }
 
-  Type *Ty = nullptr;
+  Type *Ty;
   LocTy ExplicitTypeLoc = Lex.getLoc();
   if (ParseType(Ty) ||
       ParseToken(lltok::comma, "expected comma after load's type") ||
@@ -5278,8 +5306,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
       ParseOptionalCommaAlign(Alignment, AteExtraComma))
     return true;
 
-  if (!Val->getType()->isPointerTy() ||
-      !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
+  if (!Val->getType()->isPointerTy() || !Ty->isFirstClassType())
     return Error(Loc, "load operand must be a pointer to a first class type");
   if (isAtomic && !Alignment)
     return Error(Loc, "atomic load must have explicit non-zero alignment");
@@ -5290,7 +5317,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
     return Error(ExplicitTypeLoc,
                  "explicit pointee type doesn't match operand's pointee type");
 
-  Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope);
+  Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, Scope);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
@@ -5519,7 +5546,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
       !BasePointerType->getElementType()->isSized(&Visited))
     return Error(Loc, "base element of getelementptr must be sized");
 
-  if (!GetElementPtrInst::getIndexedType(BaseType, Indices))
+  if (!GetElementPtrInst::getIndexedType(
+          cast<PointerType>(BaseType->getScalarType())->getElementType(),
+          Indices))
     return Error(Loc, "invalid getelementptr indices");
   Inst = GetElementPtrInst::Create(Ty, Ptr, Indices);
   if (InBounds)
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 5e92e57..117cdcb 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -223,7 +223,7 @@ namespace llvm {
     bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
     bool ParseOptionalCallingConv(unsigned &CC);
     bool ParseOptionalAlignment(unsigned &Alignment);
-    bool ParseOptionalDereferenceableBytes(uint64_t &Bytes);
+    bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
     bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
                                AtomicOrdering &Ordering);
     bool ParseOrdering(AtomicOrdering &Ordering);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index a7aa17c..2bdc53b 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -106,6 +106,7 @@ namespace lltok {
     kw_inalloca,
     kw_cold,
     kw_dereferenceable,
+    kw_dereferenceable_or_null,
     kw_inlinehint,
     kw_inreg,
     kw_jumptable,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 84753ff..5366f5f 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticPrinter.h"
@@ -218,6 +219,8 @@ class BitcodeReader : public GVMaterializer {
   /// True if any Metadata block has been materialized.
   bool IsMetadataMaterialized;
 
+  bool StripDebugInfo = false;
+
 public:
   std::error_code Error(BitcodeError E, const Twine &Message);
   std::error_code Error(BitcodeError E);
@@ -227,7 +230,7 @@ public:
                          DiagnosticHandlerFunction DiagnosticHandler);
   explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C,
                          DiagnosticHandlerFunction DiagnosticHandler);
-  ~BitcodeReader() { FreeState(); }
+  ~BitcodeReader() override { FreeState(); }
 
   std::error_code materializeForwardReferencedFunctions();
 
@@ -255,6 +258,8 @@ public:
   /// Materialize any deferred Metadata block.
   std::error_code materializeMetadata() override;
 
+  void setStripDebugInfo() override;
+
 private:
   std::vector<StructType *> IdentifiedStructTypes;
   StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
@@ -1093,6 +1098,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
     return Attribute::NonNull;
   case bitc::ATTR_KIND_DEREFERENCEABLE:
     return Attribute::Dereferenceable;
+  case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL:
+    return Attribute::DereferenceableOrNull;
   case bitc::ATTR_KIND_NO_RED_ZONE:
     return Attribute::NoRedZone;
   case bitc::ATTR_KIND_NO_RETURN:
@@ -1209,6 +1216,8 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() {
             B.addStackAlignmentAttr(Record[++i]);
           else if (Kind == Attribute::Dereferenceable)
             B.addDereferenceableAttr(Record[++i]);
+          else if (Kind == Attribute::DereferenceableOrNull)
+            B.addDereferenceableOrNullAttr(Record[++i]);
         } else {                     // String attribute
           assert((Record[i] == 3 || Record[i] == 4) &&
                  "Invalid attribute group entry");
@@ -1906,7 +1915,8 @@ std::error_code BitcodeReader::ParseMetadata() {
       break;
     }
     case bitc::METADATA_LOCAL_VAR: {
-      if (Record.size() != 10)
+      // 10th field is for the obseleted 'inlinedAt:' field.
+      if (Record.size() != 9 && Record.size() != 10)
         return Error("Invalid record");
 
       MDValueList.AssignValue(
@@ -1914,7 +1924,7 @@ std::error_code BitcodeReader::ParseMetadata() {
                           (Context, Record[1], getMDOrNull(Record[2]),
                            getMDString(Record[3]), getMDOrNull(Record[4]),
                            Record[5], getMDOrNull(Record[6]), Record[7],
-                           Record[8], getMDOrNull(Record[9]))),
+                           Record[8])),
           NextMDValueNo++);
       break;
     }
@@ -2308,14 +2318,17 @@ std::error_code BitcodeReader::ParseConstants() {
         Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
       }
 
-      ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
-      V = ConstantExpr::getGetElementPtr(Elts[0], Indices,
-                                         BitCode ==
-                                           bitc::CST_CODE_CE_INBOUNDS_GEP);
       if (PointeeType &&
-          PointeeType != cast<GEPOperator>(V)->getSourceElementType())
+          PointeeType !=
+              cast<SequentialType>(Elts[0]->getType()->getScalarType())
+                  ->getElementType())
         return Error("Explicit gep operator type does not match pointee type "
                      "of pointer operand");
+
+      ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
+      V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices,
+                                         BitCode ==
+                                             bitc::CST_CODE_CE_INBOUNDS_GEP);
       break;
     }
     case bitc::CST_CODE_CE_SELECT: {  // CE_SELECT: [opval#, opval#, opval#]
@@ -2609,6 +2622,8 @@ std::error_code BitcodeReader::materializeMetadata() {
   return std::error_code();
 }
 
+void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; }
+
 /// RememberAndSkipFunctionBody - When we see the block for a function body,
 /// remember where it is and then skip it.  This lets us lazily deserialize the
 /// functions.
@@ -3053,8 +3068,12 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M,
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
   while (1) {
-    if (Stream.AtEndOfStream())
-      return std::error_code();
+    if (Stream.AtEndOfStream()) {
+      if (TheModule)
+        return std::error_code();
+      // We didn't really read a proper Module.
+      return Error("Malformed IR file");
+    }
 
     BitstreamEntry Entry =
       Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
@@ -4305,6 +4324,9 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
     return EC;
   F->setIsMaterializable(false);
 
+  if (StripDebugInfo)
+    stripDebugInfo(*F);
+
   // Upgrade any old intrinsic calls in the function.
   for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
        E = UpgradedIntrinsics.end(); I != E; ++I) {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 0123fb2..aa4a6a4 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -200,6 +200,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_NON_NULL;
   case Attribute::Dereferenceable:
     return bitc::ATTR_KIND_DEREFERENCEABLE;
+  case Attribute::DereferenceableOrNull:
+    return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL;
   case Attribute::NoRedZone:
     return bitc::ATTR_KIND_NO_RED_ZONE;
   case Attribute::NoReturn:
@@ -821,7 +823,7 @@ static void WriteMDSubrange(const MDSubrange *N, const ValueEnumerator &,
                             unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getCount());
-  Record.push_back(rotateSign(N->getLo()));
+  Record.push_back(rotateSign(N->getLowerBound()));
 
   Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev);
   Record.clear();
@@ -892,10 +894,10 @@ static void WriteMDCompositeType(const MDCompositeType *N,
   Record.push_back(N->getAlignInBits());
   Record.push_back(N->getOffsetInBits());
   Record.push_back(N->getFlags());
-  Record.push_back(VE.getMetadataOrNullID(N->getElements()));
+  Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
   Record.push_back(N->getRuntimeLang());
   Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder()));
-  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier()));
 
   Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev);
@@ -909,7 +911,7 @@ static void WriteMDSubroutineType(const MDSubroutineType *N,
                                   unsigned Abbrev) {
   Record.push_back(N->isDistinct());
   Record.push_back(N->getFlags());
-  Record.push_back(VE.getMetadataOrNullID(N->getTypeArray()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get()));
 
   Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev);
   Record.clear();
@@ -940,11 +942,11 @@ static void WriteMDCompileUnit(const MDCompileUnit *N,
   Record.push_back(N->getRuntimeVersion());
   Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename()));
   Record.push_back(N->getEmissionKind());
-  Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes()));
-  Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes()));
-  Record.push_back(VE.getMetadataOrNullID(N->getSubprograms()));
-  Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables()));
-  Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities()));
+  Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getSubprograms().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get()));
+  Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get()));
 
   Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
   Record.clear();
@@ -970,10 +972,10 @@ static void WriteMDSubprogram(const MDSubprogram *N,
   Record.push_back(N->getVirtualIndex());
   Record.push_back(N->getFlags());
   Record.push_back(N->isOptimized());
-  Record.push_back(VE.getMetadataOrNullID(N->getFunction()));
-  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawFunction()));
+  Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
-  Record.push_back(VE.getMetadataOrNullID(N->getVariables()));
+  Record.push_back(VE.getMetadataOrNullID(N->getVariables().get()));
 
   Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
   Record.clear();
@@ -1064,7 +1066,7 @@ static void WriteMDGlobalVariable(const MDGlobalVariable *N,
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->isLocalToUnit());
   Record.push_back(N->isDefinition());
-  Record.push_back(VE.getMetadataOrNullID(N->getVariable()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawVariable()));
   Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
 
   Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
@@ -1085,7 +1087,6 @@ static void WriteMDLocalVariable(const MDLocalVariable *N,
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->getArg());
   Record.push_back(N->getFlags());
-  Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt()));
 
   Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev);
   Record.clear();
@@ -2047,6 +2048,9 @@ static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
 
 static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
                               BitstreamWriter &Stream) {
+  assert(VE.shouldPreserveUseListOrder() &&
+         "Expected to be preserving use-list order");
+
   auto hasMore = [&]() {
     return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F;
   };
@@ -2089,7 +2093,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
 
   bool NeedsMetadataAttachment = false;
 
-  DebugLoc LastDL;
+  MDLocation *LastDL = nullptr;
 
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -2104,26 +2108,22 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
       NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
 
       // If the instruction has a debug location, emit it.
-      DebugLoc DL = I->getDebugLoc();
-      if (DL.isUnknown()) {
-        // nothing todo.
-      } else if (DL == LastDL) {
+      MDLocation *DL = I->getDebugLoc();
+      if (!DL)
+        continue;
+
+      if (DL == LastDL) {
         // Just repeat the same debug loc as last time.
         Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
-      } else {
-        MDNode *Scope, *IA;
-        DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
-        assert(Scope && "Expected valid scope");
-
-        Vals.push_back(DL.getLine());
-        Vals.push_back(DL.getCol());
-        Vals.push_back(VE.getMetadataOrNullID(Scope));
-        Vals.push_back(VE.getMetadataOrNullID(IA));
-        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
-        Vals.clear();
-
-        LastDL = DL;
+        continue;
       }
+
+      Vals.push_back(DL->getLine());
+      Vals.push_back(DL->getColumn());
+      Vals.push_back(VE.getMetadataOrNullID(DL->getScope()));
+      Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt()));
+      Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+      Vals.clear();
     }
 
   // Emit names for all the instructions etc.
@@ -2131,7 +2131,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
 
   if (NeedsMetadataAttachment)
     WriteMetadataAttachment(F, VE, Stream);
-  if (shouldPreserveBitcodeUseListOrder())
+  if (VE.shouldPreserveUseListOrder())
     WriteUseListBlock(&F, VE, Stream);
   VE.purgeFunction();
   Stream.ExitBlock();
@@ -2313,7 +2313,8 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 }
 
 /// WriteModule - Emit the specified module to the bitstream.
-static void WriteModule(const Module *M, BitstreamWriter &Stream) {
+static void WriteModule(const Module *M, BitstreamWriter &Stream,
+                        bool ShouldPreserveUseListOrder) {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
 
   SmallVector<unsigned, 1> Vals;
@@ -2322,7 +2323,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
 
   // Analyze the module, enumerating globals, functions, etc.
-  ValueEnumerator VE(*M);
+  ValueEnumerator VE(*M, ShouldPreserveUseListOrder);
 
   // Emit blockinfo, which defines the standard abbreviations etc.
   WriteBlockInfo(VE, Stream);
@@ -2355,7 +2356,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
 
   // Emit module-level use-lists.
-  if (shouldPreserveBitcodeUseListOrder())
+  if (VE.shouldPreserveUseListOrder())
     WriteUseListBlock(nullptr, VE, Stream);
 
   // Emit function bodies.
@@ -2441,7 +2442,8 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
 
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
-void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
+                              bool ShouldPreserveUseListOrder) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
@@ -2464,7 +2466,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
     Stream.Emit(0xD, 4);
 
     // Emit the module.
-    WriteModule(M, Stream);
+    WriteModule(M, Stream, ShouldPreserveUseListOrder);
   }
 
   if (TT.isOSDarwin())
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 25456a4..3165743 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -19,22 +19,25 @@
 using namespace llvm;
 
 PreservedAnalyses BitcodeWriterPass::run(Module &M) {
-  WriteBitcodeToFile(&M, OS);
+  WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
   return PreservedAnalyses::all();
 }
 
 namespace {
   class WriteBitcodePass : public ModulePass {
     raw_ostream &OS; // raw_ostream to print on
+    bool ShouldPreserveUseListOrder;
+
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit WriteBitcodePass(raw_ostream &o)
-      : ModulePass(ID), OS(o) {}
+    explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder)
+        : ModulePass(ID), OS(o),
+          ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
     const char *getPassName() const override { return "Bitcode Writer"; }
 
     bool runOnModule(Module &M) override {
-      WriteBitcodeToFile(&M, OS);
+      WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
       return false;
     }
   };
@@ -42,6 +45,7 @@ namespace {
 
 char WriteBitcodePass::ID = 0;
 
-ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
-  return new WriteBitcodePass(Str);
+ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str,
+                                          bool ShouldPreserveUseListOrder) {
+  return new WriteBitcodePass(Str, ShouldPreserveUseListOrder);
 }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 549e94f..7f576d7 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -283,9 +283,11 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
   return V.first->getType()->isIntOrIntVectorTy();
 }
 
-ValueEnumerator::ValueEnumerator(const Module &M)
-    : HasMDString(false), HasMDLocation(false), HasGenericDebugNode(false) {
-  if (shouldPreserveBitcodeUseListOrder())
+ValueEnumerator::ValueEnumerator(const Module &M,
+                                 bool ShouldPreserveUseListOrder)
+    : HasMDString(false), HasMDLocation(false), HasGenericDebugNode(false),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
+  if (ShouldPreserveUseListOrder)
     UseListOrders = predictUseListOrder(M);
 
   // Enumerate the global variables.
@@ -373,12 +375,10 @@ ValueEnumerator::ValueEnumerator(const Module &M)
         for (unsigned i = 0, e = MDs.size(); i != e; ++i)
           EnumerateMetadata(MDs[i].second);
 
-        if (!I.getDebugLoc().isUnknown()) {
-          MDNode *Scope, *IA;
-          I.getDebugLoc().getScopeAndInlinedAt(Scope, IA, I.getContext());
-          if (Scope) EnumerateMetadata(Scope);
-          if (IA) EnumerateMetadata(IA);
-        }
+        // Don't enumerate the location directly -- it has a special record
+        // type -- but enumerate its operands.
+        if (MDLocation *L = I.getDebugLoc())
+          EnumerateMDNodeOperands(L);
       }
   }
 
@@ -463,7 +463,7 @@ void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map,
 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
   if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
 
-  if (shouldPreserveBitcodeUseListOrder())
+  if (ShouldPreserveUseListOrder)
     // Optimizing constants makes the use-list order difficult to predict.
     // Disable it for now when trying to preserve the order.
     return;
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index b94c370..ba245a3 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -67,6 +67,7 @@ private:
   bool HasMDString;
   bool HasMDLocation;
   bool HasGenericDebugNode;
+  bool ShouldPreserveUseListOrder;
 
   typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
   AttributeGroupMapType AttributeGroupMap;
@@ -102,7 +103,7 @@ private:
   ValueEnumerator(const ValueEnumerator &) = delete;
   void operator=(const ValueEnumerator &) = delete;
 public:
-  ValueEnumerator(const Module &M);
+  ValueEnumerator(const Module &M, bool ShouldPreserveUseListOrder);
 
   void dump() const;
   void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
@@ -123,6 +124,8 @@ public:
   bool hasMDLocation() const { return HasMDLocation; }
   bool hasGenericDebugNode() const { return HasGenericDebugNode; }
 
+  bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; }
+
   unsigned getTypeID(Type *T) const {
     TypeMapType::const_iterator I = TypeMap.find(T);
     assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 12cf95b..f9544dd 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -127,7 +127,7 @@ class RegisterClassInfo;
     AggressiveAntiDepBreaker(MachineFunction& MFi,
                           const RegisterClassInfo &RCI,
                           TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
-    ~AggressiveAntiDepBreaker();
+    ~AggressiveAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB) override;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 07d6731..43d7a38 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -28,7 +28,7 @@
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Mangler.h"
@@ -671,17 +671,17 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   OS << "DEBUG_VALUE: ";
 
   DIVariable V = MI->getDebugVariable();
-  if (V.getContext().isSubprogram()) {
-    StringRef Name = DISubprogram(V.getContext()).getDisplayName();
+  if (auto *SP = dyn_cast<MDSubprogram>(V->getScope())) {
+    StringRef Name = SP->getDisplayName();
     if (!Name.empty())
       OS << Name << ":";
   }
-  OS << V.getName();
+  OS << V->getName();
 
   DIExpression Expr = MI->getDebugExpression();
-  if (Expr.isBitPiece())
-    OS << " [bit_piece offset=" << Expr.getBitPieceOffset()
-       << " size=" << Expr.getBitPieceSize() << "]";
+  if (Expr->isBitPiece())
+    OS << " [bit_piece offset=" << Expr->getBitPieceOffset()
+       << " size=" << Expr->getBitPieceSize() << "]";
   OS << " <- ";
 
   // The second operand is only an offset if it's an immediate.
@@ -1034,11 +1034,31 @@ bool AsmPrinter::doFinalization(Module &M) {
     EmitVisibility(Name, V, false);
   }
 
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+
   // Emit module flags.
   SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
   M.getModuleFlagsMetadata(ModuleFlags);
   if (!ModuleFlags.empty())
-    getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM);
+    TLOF.emitModuleFlags(OutStreamer, ModuleFlags, *Mang, TM);
+
+  Triple TT(TM.getTargetTriple());
+  if (TT.isOSBinFormatELF()) {
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOF.getDataRelSection());
+      const DataLayout *DL = TM.getDataLayout();
+
+      for (const auto &Stub : Stubs) {
+        OutStreamer.EmitLabel(Stub.first);
+        OutStreamer.EmitSymbolValue(Stub.second.getPointer(),
+                                    DL->getPointerSize());
+      }
+    }
+  }
 
   // Make sure we wrote out everything we need.
   OutStreamer.Flush();
@@ -2302,7 +2322,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
 MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
   SmallString<60> NameStr;
   Mang->getNameWithPrefix(NameStr, Sym);
-  return OutContext.GetOrCreateSymbol(NameStr.str());
+  return OutContext.GetOrCreateSymbol(NameStr);
 }
 
 
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index bbdf237..1e3c5d7 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -33,7 +33,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
 }
 
-void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
+void DbgValueHistoryMap::startInstrRange(InlinedVariable Var,
                                          const MachineInstr &MI) {
   // Instruction range should start with a DBG_VALUE instruction for the
   // variable.
@@ -48,7 +48,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
   Ranges.push_back(std::make_pair(&MI, nullptr));
 }
 
-void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
+void DbgValueHistoryMap::endInstrRange(InlinedVariable Var,
                                        const MachineInstr &MI) {
   auto &Ranges = VarInstrRanges[Var];
   // Verify that the current instruction range is not yet closed.
@@ -59,7 +59,7 @@ void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
   Ranges.back().second = &MI;
 }
 
-unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
+unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const {
   const auto &I = VarInstrRanges.find(Var);
   if (I == VarInstrRanges.end())
     return 0;
@@ -71,12 +71,13 @@ unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
 
 namespace {
 // Maps physreg numbers to the variables they describe.
-typedef std::map<unsigned, SmallVector<const MDNode *, 1>> RegDescribedVarsMap;
+typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap;
 }
 
 // \brief Claim that @Var is not described by @RegNo anymore.
-static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
-                                unsigned RegNo, const MDNode *Var) {
+static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+                                InlinedVariable Var) {
   const auto &I = RegVars.find(RegNo);
   assert(RegNo != 0U && I != RegVars.end());
   auto &VarSet = I->second;
@@ -89,8 +90,8 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
 }
 
 // \brief Claim that @Var is now described by @RegNo.
-static void addRegDescribedVar(RegDescribedVarsMap &RegVars,
-                               unsigned RegNo, const MDNode *Var) {
+static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
+                               InlinedVariable Var) {
   assert(RegNo != 0U);
   auto &VarSet = RegVars[RegNo];
   assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
@@ -203,7 +204,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
       // Use the base variable (without any DW_OP_piece expressions)
       // as index into History. The full variables including the
       // piece expressions are attached to the MI.
-      DIVariable Var = MI.getDebugVariable();
+      MDLocalVariable *RawVar = MI.getDebugVariable();
+      assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+             "Expected inlined-at fields to agree");
+      InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
 
       if (unsigned PrevReg = Result.getRegisterForVar(Var))
         dropRegDescribedVar(RegVars, PrevReg, Var);
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
index 4b62007..c25aaff 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -17,7 +17,8 @@ namespace llvm {
 
 class MachineFunction;
 class MachineInstr;
-class MDNode;
+class MDLocalVariable;
+class MDLocation;
 class TargetRegisterInfo;
 
 // For each user variable, keep a list of instruction ranges where this variable
@@ -31,16 +32,19 @@ class DbgValueHistoryMap {
 public:
   typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
   typedef SmallVector<InstrRange, 4> InstrRanges;
-  typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap;
+  typedef std::pair<const MDLocalVariable *, const MDLocation *>
+      InlinedVariable;
+  typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap;
+
 private:
   InstrRangesMap VarInstrRanges;
 
 public:
-  void startInstrRange(const MDNode *Var, const MachineInstr &MI);
-  void endInstrRange(const MDNode *Var, const MachineInstr &MI);
+  void startInstrRange(InlinedVariable Var, const MachineInstr &MI);
+  void endInstrRange(InlinedVariable Var, const MachineInstr &MI);
   // Returns register currently describing @Var. If @Var is currently
   // unaccessible or is not described by a register, returns 0.
-  unsigned getRegisterForVar(const MDNode *Var) const;
+  unsigned getRegisterForVar(InlinedVariable Var) const;
 
   bool empty() const { return VarInstrRanges.empty(); }
   void clear() { VarInstrRanges.clear(); }
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 6914bbe..4f6714e 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -42,8 +42,8 @@ public:
     }
     Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc)
         : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) {
-      assert(DIVariable(Var).Verify());
-      assert(DIExpression(Expr)->isValid());
+      assert(isa<MDLocalVariable>(Var));
+      assert(cast<MDExpression>(Expr)->isValid());
     }
 
     /// The variable to which this location entry corresponds.
@@ -74,10 +74,11 @@ public:
     const ConstantFP *getConstantFP() const { return Constant.CFP; }
     const ConstantInt *getConstantInt() const { return Constant.CIP; }
     MachineLocation getLoc() const { return Loc; }
-    const MDNode *getVariableNode() const { return Variable; }
-    DIVariable getVariable() const { return DIVariable(Variable); }
-    bool isBitPiece() const { return getExpression().isBitPiece(); }
-    DIExpression getExpression() const { return DIExpression(Expression); }
+    DIVariable getVariable() const { return cast<MDLocalVariable>(Variable); }
+    bool isBitPiece() const { return getExpression()->isBitPiece(); }
+    DIExpression getExpression() const {
+      return cast_or_null<MDExpression>(Expression);
+    }
     friend bool operator==(const Value &, const Value &);
     friend bool operator<(const Value &, const Value &);
   };
@@ -101,12 +102,12 @@ public:
   /// Return true if the merge was successful.
   bool MergeValues(const DebugLocEntry &Next) {
     if (Begin == Next.Begin) {
-      DIExpression Expr(Values[0].Expression);
-      DIVariable Var(Values[0].Variable);
-      DIExpression NextExpr(Next.Values[0].Expression);
-      DIVariable NextVar(Next.Values[0].Variable);
-      if (Var == NextVar && Expr.isBitPiece() &&
-          NextExpr.isBitPiece()) {
+      DIExpression Expr = cast_or_null<MDExpression>(Values[0].Expression);
+      DIVariable Var = cast_or_null<MDLocalVariable>(Values[0].Variable);
+      DIExpression NextExpr =
+          cast_or_null<MDExpression>(Next.Values[0].Expression);
+      DIVariable NextVar = cast_or_null<MDLocalVariable>(Next.Values[0].Variable);
+      if (Var == NextVar && Expr->isBitPiece() && NextExpr->isBitPiece()) {
         addValues(Next.Values);
         End = Next.End;
         return true;
@@ -189,8 +190,8 @@ inline bool operator==(const DebugLocEntry::Value &A,
 /// \brief Compare two pieces based on their offset.
 inline bool operator<(const DebugLocEntry::Value &A,
                       const DebugLocEntry::Value &B) {
-  return A.getExpression().getBitPieceOffset() <
-         B.getExpression().getBitPieceOffset();
+  return A.getExpression()->getBitPieceOffset() <
+         B.getExpression()->getBitPieceOffset();
 }
 
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index eee5fc5..75d3b68 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -101,51 +101,52 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
   if (DIE *Die = getDIE(GV))
     return Die;
 
-  assert(GV.isGlobalVariable());
+  assert(GV);
 
-  DIScope GVContext = GV.getContext();
-  DIType GTy = DD->resolve(GV.getType());
+  DIScope GVContext = GV->getScope();
+  DIType GTy = DD->resolve(GV->getType());
 
   // Construct the context before querying for the existence of the DIE in
   // case such construction creates the DIE.
   DIE *ContextDIE = getOrCreateContextDIE(GVContext);
 
   // Add to map.
-  DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
+  DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
   DIScope DeclContext;
 
-  if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) {
-    DeclContext = resolve(SDMDecl.getContext());
-    assert(SDMDecl.isStaticMember() && "Expected static member decl");
-    assert(GV.isDefinition());
+  if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
+    DeclContext = resolve(SDMDecl->getScope());
+    assert(SDMDecl->isStaticMember() && "Expected static member decl");
+    assert(GV->isDefinition());
     // We need the declaration DIE that is in the static member's class.
     DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
     addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
   } else {
-    DeclContext = GV.getContext();
+    DeclContext = GV->getScope();
     // Add name and type.
-    addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+    addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName());
     addType(*VariableDIE, GTy);
 
     // Add scoping info.
-    if (!GV.isLocalToUnit())
+    if (!GV->isLocalToUnit())
       addFlag(*VariableDIE, dwarf::DW_AT_external);
 
     // Add line number info.
     addSourceLine(*VariableDIE, GV);
   }
 
-  if (!GV.isDefinition())
+  if (!GV->isDefinition())
     addFlag(*VariableDIE, dwarf::DW_AT_declaration);
+  else
+    addGlobalName(GV->getName(), *VariableDIE, DeclContext);
 
   // Add location.
   bool addToAccelTable = false;
-  bool isGlobalVariable = GV.getGlobal() != nullptr;
-  if (isGlobalVariable) {
+  if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
     addToAccelTable = true;
     DIELoc *Loc = new (DIEValueAllocator) DIELoc();
-    const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal());
-    if (GV.getGlobal()->isThreadLocal()) {
+    const MCSymbol *Sym = Asm->getSymbol(Global);
+    if (Global->isThreadLocal()) {
       // FIXME: Make this work with -gsplit-dwarf.
       unsigned PointerSize = Asm->getDataLayout().getPointerSize();
       assert((PointerSize == 4 || PointerSize == 8) &&
@@ -174,11 +175,11 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
     }
 
     addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
-    addLinkageName(*VariableDIE, GV.getLinkageName());
+    addLinkageName(*VariableDIE, GV->getLinkageName());
   } else if (const ConstantInt *CI =
-                 dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
+                 dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
     addConstantValue(*VariableDIE, CI, GTy);
-  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) {
+  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
     addToAccelTable = true;
     // GV is a merged global.
     DIELoc *Loc = new (DIEValueAllocator) DIELoc();
@@ -195,15 +196,14 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
   }
 
   if (addToAccelTable) {
-    DD->addAccelName(GV.getName(), *VariableDIE);
+    DD->addAccelName(GV->getName(), *VariableDIE);
 
     // If the linkage name is different than the name, go ahead and output
     // that as well into the name table.
-    if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
-      DD->addAccelName(GV.getLinkageName(), *VariableDIE);
+    if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName())
+      DD->addAccelName(GV->getLinkageName(), *VariableDIE);
   }
 
-  addGlobalName(GV.getName(), *VariableDIE, DeclContext);
   return VariableDIE;
 }
 
@@ -307,7 +307,7 @@ void DwarfCompileUnit::constructScopeDIE(
 
   DIScope DS(Scope->getScopeNode());
 
-  assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
+  assert((Scope->getInlinedAt() || !isa<MDSubprogram>(DS)) &&
          "Only handle inlined subprograms here, use "
          "constructSubprogramScopeDIE for non-inlined "
          "subprograms");
@@ -318,7 +318,7 @@ void DwarfCompileUnit::constructScopeDIE(
   // avoid creating un-used children then removing them later when we find out
   // the scope DIE is null.
   std::unique_ptr<DIE> ScopeDIE;
-  if (Scope->getParent() && DS.isSubprogram()) {
+  if (Scope->getParent() && isa<MDSubprogram>(DS)) {
     ScopeDIE = constructInlinedScopeDIE(Scope);
     if (!ScopeDIE)
       return;
@@ -340,7 +340,7 @@ void DwarfCompileUnit::constructScopeDIE(
       // There is no need to emit empty lexical block DIE.
       for (const auto &E : DD->findImportedEntitiesForScope(DS))
         Children.push_back(
-            constructImportedEntityDIE(DIImportedEntity(E.second)));
+            constructImportedEntityDIE(cast<MDImportedEntity>(E.second)));
     }
 
     // If there are only other scopes as children, put them directly in the
@@ -431,10 +431,10 @@ DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
 
   // Add the call site information to the DIE.
-  DILocation DL(Scope->getInlinedAt());
+  const MDLocation *IA = Scope->getInlinedAt();
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
-          getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
-  addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
+          getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
+  addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
@@ -523,7 +523,7 @@ DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
     assert(Expr != DV.getExpression().end() &&
            "Wrong number of expressions");
     DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
-    DwarfExpr.AddExpression(Expr->begin(), Expr->end());
+    DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
     ++Expr;
   }
   addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
@@ -562,16 +562,14 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   assert(Scope && Scope->getScopeNode());
   assert(!Scope->getInlinedAt());
   assert(!Scope->isAbstractScope());
-  DISubprogram Sub(Scope->getScopeNode());
-
-  assert(Sub.isSubprogram());
+  DISubprogram Sub = cast<MDSubprogram>(Scope->getScopeNode());
 
   DD->getProcessedSPNodes().insert(Sub);
 
   DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
 
   // If this is a variadic function, add an unspecified parameter.
-  DITypeArray FnArgs = Sub.getType().getTypeArray();
+  DITypeArray FnArgs = Sub->getType()->getTypeArray();
 
   // Collect lexical scope children first.
   // ObjectPointer might be a local (non-argument) local variable if it's a
@@ -582,8 +580,7 @@ void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
   // If we have a single element of null, it is a function that returns void.
   // If we have more than one elements and the last one is null, it is a
   // variadic function.
-  if (FnArgs.getNumElements() > 1 &&
-      !FnArgs.getElement(FnArgs.getNumElements() - 1) &&
+  if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] &&
       !includeMinimalInlineScopes())
     ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
 }
@@ -607,7 +604,7 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
   if (AbsDef)
     return;
 
-  DISubprogram SP(Scope->getScopeNode());
+  DISubprogram SP = cast<MDSubprogram>(Scope->getScopeNode());
 
   DIE *ContextDIE;
 
@@ -617,11 +614,11 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
   // the important distinction that the DIDescriptor is not associated with the
   // DIE (since the DIDescriptor will be associated with the concrete DIE, if
   // any). It could be refactored to some common utility function.
-  else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  else if (auto *SPDecl = SP->getDeclaration()) {
     ContextDIE = &getUnitDie();
     getOrCreateSubprogramDIE(SPDecl);
   } else
-    ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
+    ContextDIE = getOrCreateContextDIE(resolve(SP->getScope()));
 
   // Passing null as the associated DIDescriptor because the abstract definition
   // shouldn't be found by lookup.
@@ -637,28 +634,25 @@ DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
 
 std::unique_ptr<DIE>
 DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) {
-  assert(Module.Verify() &&
-         "Use one of the MDNode * overloads to handle invalid metadata");
-  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag());
+  std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module->getTag());
   insertDIE(Module, IMDie.get());
   DIE *EntityDie;
-  DIDescriptor Entity = resolve(Module.getEntity());
-  if (Entity.isNameSpace())
-    EntityDie = getOrCreateNameSpace(DINameSpace(Entity));
-  else if (Entity.isSubprogram())
-    EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity));
-  else if (Entity.isType())
-    EntityDie = getOrCreateTypeDIE(DIType(Entity));
-  else if (Entity.isGlobalVariable())
-    EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity));
+  auto *Entity = resolve(Module->getEntity());
+  if (auto *NS = dyn_cast<MDNamespace>(Entity))
+    EntityDie = getOrCreateNameSpace(NS);
+  else if (auto *SP = dyn_cast<MDSubprogram>(Entity))
+    EntityDie = getOrCreateSubprogramDIE(SP);
+  else if (auto *T = dyn_cast<MDType>(Entity))
+    EntityDie = getOrCreateTypeDIE(T);
+  else if (auto *GV = dyn_cast<MDGlobalVariable>(Entity))
+    EntityDie = getOrCreateGlobalVariableDIE(GV);
   else
     EntityDie = getDIE(Entity);
   assert(EntityDie);
-  addSourceLine(*IMDie, Module.getLineNumber(),
-                Module.getContext().getFilename(),
-                Module.getContext().getDirectory());
+  addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(),
+                Module->getScope()->getDirectory());
   addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
-  StringRef Name = Module.getName();
+  StringRef Name = Module->getName();
   if (!Name.empty())
     addString(*IMDie, dwarf::DW_AT_name, Name);
 
@@ -683,21 +677,19 @@ void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) {
   }
 }
 void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
-  assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram");
-  assert(SP.isDefinition() &&
+  assert(SP && "CU's subprogram list contains a non-subprogram");
+  assert(SP->isDefinition() &&
          "CU's subprogram list contains a subprogram declaration");
-  DIArray Variables = SP.getVariables();
-  if (Variables.getNumElements() == 0)
+  auto Variables = SP->getVariables();
+  if (Variables.size() == 0)
     return;
 
   DIE *SPDIE = DU->getAbstractSPDies().lookup(SP);
   if (!SPDIE)
     SPDIE = getDIE(SP);
   assert(SPDIE);
-  for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
-    DIVariable DV(Variables.getElement(vi));
-    assert(DV.isVariable());
-    DbgVariable NewVar(DV, DIExpression(), DD);
+  for (DIVariable DV : Variables) {
+    DbgVariable NewVar(DV, nullptr, DIExpression(), DD);
     auto VariableDie = constructVariableDIE(NewVar);
     applyVariableAttributes(NewVar, *VariableDie);
     SPDIE->addChild(std::move(VariableDie));
@@ -728,7 +720,7 @@ void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die,
                                      DIScope Context) {
   if (includeMinimalInlineScopes())
     return;
-  std::string FullName = getParentContextString(Context) + Ty.getName().str();
+  std::string FullName = getParentContextString(Context) + Ty->getName().str();
   GlobalTypes[FullName] = &Die;
 }
 
@@ -778,7 +770,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
     ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(),
                                                Location.getOffset());
     if (ValidReg)
-      DwarfExpr.AddExpression(Expr.begin(), Expr.end());
+      DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
   } else
     ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg());
 
@@ -816,10 +808,10 @@ void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form,
 
 void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP,
                                                              DIE &SPDie) {
-  DISubprogram SPDecl = SP.getFunctionDeclaration();
-  DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+  auto *SPDecl = SP->getDeclaration();
+  DIScope Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope());
   applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
-  addGlobalName(SP.getName(), SPDie, Context);
+  addGlobalName(SP->getName(), SPDie, Context);
 }
 
 bool DwarfCompileUnit::isDwoUnit() const {
@@ -827,7 +819,7 @@ bool DwarfCompileUnit::isDwoUnit() const {
 }
 
 bool DwarfCompileUnit::includeMinimalInlineScopes() const {
-  return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly ||
+  return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly ||
          (DD->useSplitDwarf() && !Skeleton);
 }
 } // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index e9ebd97..fb8fc6e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -129,20 +129,22 @@ bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
 
 /// resolve - Look in the DwarfDebug map for the MDNode that
 /// corresponds to the reference.
-template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const {
+template <typename T> T *DbgVariable::resolve(TypedDebugNodeRef<T> Ref) const {
   return DD->resolve(Ref);
 }
 
 bool DbgVariable::isBlockByrefVariable() const {
-  assert(Var.isVariable() && "Invalid complex DbgVariable!");
-  return Var.isBlockByrefVariable(DD->getTypeIdentifierMap());
+  assert(Var && "Invalid complex DbgVariable!");
+  return Var->getType()
+      .resolve(DD->getTypeIdentifierMap())
+      ->isBlockByrefStruct();
 }
 
 DIType DbgVariable::getType() const {
-  DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap());
+  MDType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap());
   // FIXME: isBlockByrefVariable should be reformulated in terms of complex
   // addresses instead.
-  if (Var.isBlockByrefVariable(DD->getTypeIdentifierMap())) {
+  if (Ty->isBlockByrefStruct()) {
     /* Byref variables, in Blocks, are declared by the programmer as
        "SomeType VarName;", but the compiler creates a
        __Block_byref_x_VarName struct, and gives the variable VarName
@@ -167,17 +169,17 @@ DIType DbgVariable::getType() const {
        have a DW_AT_location that tells the debugger how to unwind through
        the pointers and __Block_byref_x_VarName struct to find the actual
        value of the variable.  The function addBlockByrefType does this.  */
-    DIType subType = Ty;
-    uint16_t tag = Ty.getTag();
+    MDType *subType = Ty;
+    uint16_t tag = Ty->getTag();
 
     if (tag == dwarf::DW_TAG_pointer_type)
-      subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom());
+      subType = resolve(DITypeRef(cast<MDDerivedType>(Ty)->getBaseType()));
 
-    DIArray Elements = DICompositeType(subType).getElements();
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIDerivedType DT(Elements.getElement(i));
-      if (getName() == DT.getName())
-        return (resolve(DT.getTypeDerivedFrom()));
+    auto Elements = cast<MDCompositeTypeBase>(subType)->getElements();
+    for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+      auto *DT = cast<MDDerivedTypeBase>(Elements[i]);
+      if (getName() == DT->getName())
+        return resolve(DITypeRef(DT->getBaseType()));
     }
   }
   return Ty;
@@ -275,25 +277,25 @@ static StringRef getObjCMethodName(StringRef In) {
 // that do not have a DW_AT_name or DW_AT_linkage_name field - this
 // is only slightly different than the lookup of non-standard ObjC names.
 void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
-  if (!SP.isDefinition())
+  if (!SP->isDefinition())
     return;
-  addAccelName(SP.getName(), Die);
+  addAccelName(SP->getName(), Die);
 
   // If the linkage name is different than the name, go ahead and output
   // that as well into the name table.
-  if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
-    addAccelName(SP.getLinkageName(), Die);
+  if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName())
+    addAccelName(SP->getLinkageName(), Die);
 
   // If this is an Objective-C selector name add it to the ObjC accelerator
   // too.
-  if (isObjCClass(SP.getName())) {
+  if (isObjCClass(SP->getName())) {
     StringRef Class, Category;
-    getObjCClassCategory(SP.getName(), Class, Category);
+    getObjCClassCategory(SP->getName(), Class, Category);
     addAccelObjC(Class, Die);
     if (Category != "")
       addAccelObjC(Category, Die);
     // Also add the base method name to the name table.
-    addAccelName(getObjCMethodName(SP.getName()), Die);
+    addAccelName(getObjCMethodName(SP->getName()), Die);
   }
 }
 
@@ -302,11 +304,10 @@ void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
 bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
   if (!Context)
     return false;
-  DIDescriptor D(Context);
-  if (D.isSubprogram())
+  if (isa<MDSubprogram>(Context))
     return true;
-  if (D.isType())
-    return isSubprogramContext(resolve(DIType(Context).getContext()));
+  if (auto *T = dyn_cast<MDType>(Context))
+    return isSubprogramContext(resolve(T->getScope()));
   return false;
 }
 
@@ -362,8 +363,8 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
 // Create new DwarfCompileUnit for the given metadata node with tag
 // DW_TAG_compile_unit.
 DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
-  StringRef FN = DIUnit.getFilename();
-  CompilationDir = DIUnit.getDirectory();
+  StringRef FN = DIUnit->getFilename();
+  CompilationDir = DIUnit->getDirectory();
 
   auto OwnedUnit = make_unique<DwarfCompileUnit>(
       InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
@@ -381,9 +382,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
     Asm->OutStreamer.getContext().setMCLineTableCompilationDir(
         NewCU.getUniqueID(), CompilationDir);
 
-  NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
+  NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer());
   NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
-                DIUnit.getLanguage());
+                DIUnit->getSourceLanguage());
   NewCU.addString(Die, dwarf::DW_AT_name, FN);
 
   if (!useSplitDwarf()) {
@@ -397,14 +398,14 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
     addGnuPubAttributes(NewCU, Die);
   }
 
-  if (DIUnit.isOptimized())
+  if (DIUnit->isOptimized())
     NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
 
-  StringRef Flags = DIUnit.getFlags();
+  StringRef Flags = DIUnit->getFlags();
   if (!Flags.empty())
     NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
 
-  if (unsigned RVer = DIUnit.getRunTimeVersion())
+  if (unsigned RVer = DIUnit->getRuntimeVersion())
     NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
                   dwarf::DW_FORM_data1, RVer);
 
@@ -420,9 +421,8 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
 
 void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
                                                   const MDNode *N) {
-  DIImportedEntity Module(N);
-  assert(Module.Verify());
-  if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext()))
+  DIImportedEntity Module = cast<MDImportedEntity>(N);
+  if (DIE *D = TheCU.getOrCreateContextDIE(Module->getScope()))
     D->addChild(TheCU.constructImportedEntityDIE(Module));
 }
 
@@ -445,44 +445,35 @@ void DwarfDebug::beginModule() {
   SingleCU = CU_Nodes->getNumOperands() == 1;
 
   for (MDNode *N : CU_Nodes->operands()) {
-    DICompileUnit CUNode(N);
+    DICompileUnit CUNode = cast<MDCompileUnit>(N);
     DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
-    DIArray ImportedEntities = CUNode.getImportedEntities();
-    for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
-      ScopesWithImportedEntities.push_back(std::make_pair(
-          DIImportedEntity(ImportedEntities.getElement(i)).getContext(),
-          ImportedEntities.getElement(i)));
+    for (auto *IE : CUNode->getImportedEntities())
+      ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE));
     // Stable sort to preserve the order of appearance of imported entities.
     // This is to avoid out-of-order processing of interdependent declarations
     // within the same scope, e.g. { namespace A = base; namespace B = A; }
     std::stable_sort(ScopesWithImportedEntities.begin(),
                      ScopesWithImportedEntities.end(), less_first());
-    DIArray GVs = CUNode.getGlobalVariables();
-    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
-      CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
-    DIArray SPs = CUNode.getSubprograms();
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-      SPMap.insert(std::make_pair(SPs.getElement(i), &CU));
-    DIArray EnumTypes = CUNode.getEnumTypes();
-    for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) {
-      DIType Ty(EnumTypes.getElement(i));
+    for (auto *GV : CUNode->getGlobalVariables())
+      CU.getOrCreateGlobalVariableDIE(GV);
+    for (auto *SP : CUNode->getSubprograms())
+      SPMap.insert(std::make_pair(SP, &CU));
+    for (DIType Ty : CUNode->getEnumTypes()) {
       // The enum types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      DIType UniqueTy(resolve(Ty.getRef()));
+      DIType UniqueTy = cast<MDType>(resolve(Ty->getRef()));
       CU.getOrCreateTypeDIE(UniqueTy);
     }
-    DIArray RetainedTypes = CUNode.getRetainedTypes();
-    for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) {
-      DIType Ty(RetainedTypes.getElement(i));
+    for (DIType Ty : CUNode->getRetainedTypes()) {
       // The retained types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      DIType UniqueTy(resolve(Ty.getRef()));
+      DIType UniqueTy = cast<MDType>(resolve(Ty->getRef()));
       CU.getOrCreateTypeDIE(UniqueTy);
     }
     // Emit imported_modules last so that the relevant context is already
     // available.
-    for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
-      constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i));
+    for (auto *IE : CUNode->getImportedEntities())
+      constructAndAddImportedEntityDIE(CU, IE);
   }
 
   // Tell MMI that we have debug info.
@@ -498,7 +489,8 @@ void DwarfDebug::finishVariableDefinitions() {
     // DIE::getUnit isn't simple - it walks parent pointers, etc.
     DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
     assert(Unit);
-    DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable());
+    DbgVariable *AbsVar = getExistingAbstractVariable(
+        InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
     if (AbsVar && AbsVar->getDIE()) {
       Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
                         *AbsVar->getDIE());
@@ -510,7 +502,7 @@ void DwarfDebug::finishVariableDefinitions() {
 void DwarfDebug::finishSubprogramDefinitions() {
   for (const auto &P : SPMap)
     forBothCUs(*P.second, [&](DwarfCompileUnit &CU) {
-      CU.finishSubprogramDefinition(DISubprogram(P.first));
+      CU.finishSubprogramDefinition(cast<MDSubprogram>(P.first));
     });
 }
 
@@ -521,14 +513,12 @@ void DwarfDebug::collectDeadVariables() {
 
   if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
     for (MDNode *N : CU_Nodes->operands()) {
-      DICompileUnit TheCU(N);
+      DICompileUnit TheCU = cast<MDCompileUnit>(N);
       // Construct subprogram DIE and add variables DIEs.
       DwarfCompileUnit *SPCU =
           static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
       assert(SPCU && "Unable to find Compile Unit!");
-      DIArray Subprograms = TheCU.getSubprograms();
-      for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
-        DISubprogram SP(Subprograms.getElement(i));
+      for (auto *SP : TheCU->getSubprograms()) {
         if (ProcessedSPNodes.count(SP) != 0)
           continue;
         SPCU->collectDeadVariables(SP);
@@ -671,70 +661,71 @@ void DwarfDebug::endModule() {
 }
 
 // Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV,
+DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV,
                                                      DIVariable &Cleansed) {
-  LLVMContext &Ctx = DV->getContext();
   // More then one inlined variable corresponds to one abstract variable.
-  // FIXME: This duplication of variables when inlining should probably be
-  // removed. It's done to allow each DIVariable to describe its location
-  // because the DebugLoc on the dbg.value/declare isn't accurate. We should
-  // make it accurate then remove this duplication/cleansing stuff.
-  Cleansed = cleanseInlinedVariable(DV, Ctx);
+  Cleansed = IV.first;
   auto I = AbstractVariables.find(Cleansed);
   if (I != AbstractVariables.end())
     return I->second.get();
   return nullptr;
 }
 
-DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
+DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
   DIVariable Cleansed;
-  return getExistingAbstractVariable(DV, Cleansed);
+  return getExistingAbstractVariable(IV, Cleansed);
 }
 
 void DwarfDebug::createAbstractVariable(const DIVariable &Var,
                                         LexicalScope *Scope) {
-  auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this);
+  auto AbsDbgVariable =
+      make_unique<DbgVariable>(Var, nullptr, DIExpression(), this);
   InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
   AbstractVariables[Var] = std::move(AbsDbgVariable);
 }
 
-void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV,
+void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV,
                                                  const MDNode *ScopeNode) {
-  DIVariable Cleansed = DV;
-  if (getExistingAbstractVariable(DV, Cleansed))
+  DIVariable Cleansed;
+  if (getExistingAbstractVariable(IV, Cleansed))
     return;
 
-  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode));
+  createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+                                       cast<MDLocalScope>(ScopeNode)));
 }
 
-void
-DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
-                                                    const MDNode *ScopeNode) {
-  DIVariable Cleansed = DV;
-  if (getExistingAbstractVariable(DV, Cleansed))
+void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
+    InlinedVariable IV, const MDNode *ScopeNode) {
+  DIVariable Cleansed;
+  if (getExistingAbstractVariable(IV, Cleansed))
     return;
 
-  if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode))
+  if (LexicalScope *Scope =
+          LScopes.findAbstractScope(cast_or_null<MDLocalScope>(ScopeNode)))
     createAbstractVariable(Cleansed, Scope);
 }
 
 // Collect variable information from side table maintained by MMI.
 void DwarfDebug::collectVariableInfoFromMMITable(
-    SmallPtrSetImpl<const MDNode *> &Processed) {
+    DenseSet<InlinedVariable> &Processed) {
   for (const auto &VI : MMI->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
-    Processed.insert(VI.Var);
+    assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
+           "Expected inlined-at fields to agree");
+
+    InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt());
+    Processed.insert(Var);
     LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
 
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
-    DIVariable DV(VI.Var);
-    DIExpression Expr(VI.Expr);
-    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
-    auto RegVar = make_unique<DbgVariable>(DV, Expr, this, VI.Slot);
+    DIExpression Expr = cast_or_null<MDExpression>(VI.Expr);
+    ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
+    auto RegVar =
+        make_unique<DbgVariable>(Var.first, Var.second, Expr, this, VI.Slot);
     if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
       ConcreteVariables.push_back(std::move(RegVar));
   }
@@ -768,12 +759,12 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
 
 /// Determine whether two variable pieces overlap.
 static bool piecesOverlap(DIExpression P1, DIExpression P2) {
-  if (!P1.isBitPiece() || !P2.isBitPiece())
+  if (!P1->isBitPiece() || !P2->isBitPiece())
     return true;
-  unsigned l1 = P1.getBitPieceOffset();
-  unsigned l2 = P2.getBitPieceOffset();
-  unsigned r1 = l1 + P1.getBitPieceSize();
-  unsigned r2 = l2 + P2.getBitPieceSize();
+  unsigned l1 = P1->getBitPieceOffset();
+  unsigned l2 = P2->getBitPieceOffset();
+  unsigned r1 = l1 + P1->getBitPieceSize();
+  unsigned r2 = l2 + P2->getBitPieceSize();
   // True where [l1,r1[ and [r1,r2[ overlap.
   return (l1 < r2) && (l2 < r1);
 }
@@ -845,7 +836,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     bool couldMerge = false;
 
     // If this is a piece, it may belong to the current DebugLocEntry.
-    if (DIExpr.isBitPiece()) {
+    if (DIExpr->isBitPiece()) {
       // Add this value to the list of open ranges.
       OpenRanges.push_back(Value);
 
@@ -884,35 +875,34 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
 
 
 // Find variables for each lexical scope.
-void
-DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
-                                SmallPtrSetImpl<const MDNode *> &Processed) {
+void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
+                                     DenseSet<InlinedVariable> &Processed) {
   // Grab the variable info that was squirreled away in the MMI side-table.
   collectVariableInfoFromMMITable(Processed);
 
   for (const auto &I : DbgValues) {
-    DIVariable DV(I.first);
-    if (Processed.count(DV))
+    InlinedVariable IV = I.first;
+    if (Processed.count(IV))
       continue;
 
-    // Instruction ranges, specifying where DV is accessible.
+    // Instruction ranges, specifying where IV is accessible.
     const auto &Ranges = I.second;
     if (Ranges.empty())
       continue;
 
     LexicalScope *Scope = nullptr;
-    if (MDNode *IA = DV.getInlinedAt())
-      Scope = LScopes.findInlinedScope(DV.getContext(), IA);
+    if (const MDLocation *IA = IV.second)
+      Scope = LScopes.findInlinedScope(IV.first->getScope(), IA);
     else
-      Scope = LScopes.findLexicalScope(DV.getContext());
+      Scope = LScopes.findLexicalScope(IV.first->getScope());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
 
-    Processed.insert(DV);
+    Processed.insert(IV);
     const MachineInstr *MInsn = Ranges.front().first;
     assert(MInsn->isDebugValue() && "History must begin with debug value");
-    ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+    ensureAbstractVariableIsCreatedIfScoped(IV, Scope->getScopeNode());
     ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
     DbgVariable *RegVar = ConcreteVariables.back().get();
     InfoHolder.addScopeVariable(Scope, RegVar);
@@ -937,16 +927,15 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
   }
 
   // Collect info for variables that were optimized out.
-  DIArray Variables = SP.getVariables();
-  for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
-    DIVariable DV(Variables.getElement(i));
-    assert(DV.isVariable());
-    if (!Processed.insert(DV).second)
+  for (DIVariable DV : SP->getVariables()) {
+    if (!Processed.insert(InlinedVariable(DV, nullptr)).second)
       continue;
-    if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
-      ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+    if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) {
+      ensureAbstractVariableIsCreatedIfScoped(InlinedVariable(DV, nullptr),
+                                              Scope->getScopeNode());
       DIExpression NoExpr;
-      ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this));
+      ConcreteVariables.push_back(
+          make_unique<DbgVariable>(DV, nullptr, NoExpr, this));
       InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get());
     }
   }
@@ -972,7 +961,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   if (!MI->isDebugValue()) {
     DebugLoc DL = MI->getDebugLoc();
     if (DL != PrevInstLoc) {
-      if (!DL.isUnknown()) {
+      if (DL) {
         unsigned Flags = 0;
         PrevInstLoc = DL;
         if (DL == PrologEndLoc) {
@@ -984,7 +973,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
             Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
           Flags |= DWARF2_FLAG_IS_STMT;
 
-        const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+        const MDNode *Scope = DL.getScope();
         recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
       } else if (UnknownLocations) {
         PrevInstLoc = DL;
@@ -1072,7 +1061,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
   for (const auto &MBB : *MF)
     for (const auto &MI : MBB)
       if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
-          !MI.getDebugLoc().isUnknown()) {
+          MI.getDebugLoc()) {
         // Did the target forget to set the FrameSetup flag for CFI insns?
         assert(!MI.isCFIInstruction() &&
                "First non-frame-setup instruction is a CFI instruction.");
@@ -1137,11 +1126,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 
     // The first mention of a function argument gets the CurrentFnBegin
     // label, so arguments are visible when breaking at function entry.
-    DIVariable DIVar(Ranges.front().first->getDebugVariable());
-    if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
-        getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
+    DIVariable DIVar = Ranges.front().first->getDebugVariable();
+    if (DIVar->getTag() == dwarf::DW_TAG_arg_variable &&
+        getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
       LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
-      if (Ranges.front().first->getDebugExpression().isBitPiece()) {
+      if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
         // Mark all non-overlapping initial pieces.
         for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
           DIExpression Piece = I->first->getDebugExpression();
@@ -1168,15 +1157,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 
   // Record beginning of function.
   PrologEndLoc = findPrologueEndLoc(MF);
-  if (!PrologEndLoc.isUnknown()) {
-    DebugLoc FnStartDL =
-        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
-
+  if (MDLocation *L = PrologEndLoc) {
     // We'd like to list the prologue as "not statements" but GDB behaves
     // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
-    recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
-                     FnStartDL.getScope(MF->getFunction()->getContext()),
-                     DWARF2_FLAG_IS_STMT);
+    auto *SP = L->getInlinedAtScope()->getSubprogram();
+    recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
   }
 }
 
@@ -1199,10 +1184,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
 
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
-  DISubprogram SP(FnScope->getScopeNode());
+  DISubprogram SP = cast<MDSubprogram>(FnScope->getScopeNode());
   DwarfCompileUnit &TheCU = *SPMap.lookup(SP);
 
-  SmallPtrSet<const MDNode *, 16> ProcessedVars;
+  DenseSet<InlinedVariable> ProcessedVars;
   collectVariableInfo(TheCU, SP, ProcessedVars);
 
   // Add the range of this function to the list of ranges for the CU.
@@ -1210,7 +1195,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 
   // Under -gmlt, skip building the subprogram if there are no inlined
   // subroutines inside it.
-  if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly &&
+  if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly &&
       LScopes.getAbstractScopesList().empty() && !IsDarwin) {
     assert(InfoHolder.getScopeVariables().empty());
     assert(DbgValues.empty());
@@ -1229,16 +1214,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 #endif
   // Construct abstract scopes.
   for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
-    DISubprogram SP(AScope->getScopeNode());
-    assert(SP.isSubprogram());
+    DISubprogram SP = cast<MDSubprogram>(AScope->getScopeNode());
     // Collect info for variables that were optimized out.
-    DIArray Variables = SP.getVariables();
-    for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
-      DIVariable DV(Variables.getElement(i));
-      assert(DV && DV.isVariable());
-      if (!ProcessedVars.insert(DV).second)
+    for (DIVariable DV : SP->getVariables()) {
+      if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
         continue;
-      ensureAbstractVariableIsCreated(DV, DV.getContext());
+      ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr),
+                                      DV->getScope());
       assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
              && "ensureAbstractVariableIsCreated inserted abstract scopes");
     }
@@ -1270,12 +1252,11 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
   StringRef Dir;
   unsigned Src = 1;
   unsigned Discriminator = 0;
-  if (DIScope Scope = DIScope(S)) {
-    assert(Scope.isScope());
-    Fn = Scope.getFilename();
-    Dir = Scope.getDirectory();
-    if (Scope.isLexicalBlockFile())
-      Discriminator = DILexicalBlockFile(S).getDiscriminator();
+  if (auto *Scope = cast_or_null<MDScope>(S)) {
+    Fn = Scope->getFilename();
+    Dir = Scope->getDirectory();
+    if (auto *LBF = dyn_cast<MDLexicalBlockFile>(Scope))
+      Discriminator = LBF->getDiscriminator();
 
     unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID();
     Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
@@ -1499,23 +1480,25 @@ static void emitDebugLocValue(const AsmPrinter &AP,
                                     Streamer);
   // Regular entry.
   if (Value.isInt()) {
-    DIBasicType BTy(DV.getType().resolve(TypeIdentifierMap));
-    if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
-                         BTy.getEncoding() == dwarf::DW_ATE_signed_char))
+    MDType *T = DV->getType().resolve(TypeIdentifierMap);
+    auto *B = dyn_cast<MDBasicType>(T);
+    if (B && (B->getEncoding() == dwarf::DW_ATE_signed ||
+              B->getEncoding() == dwarf::DW_ATE_signed_char))
       DwarfExpr.AddSignedConstant(Value.getInt());
     else
       DwarfExpr.AddUnsignedConstant(Value.getInt());
   } else if (Value.isLocation()) {
     MachineLocation Loc = Value.getLoc();
     DIExpression Expr = Value.getExpression();
-    if (!Expr || (Expr.getNumElements() == 0))
+    if (!Expr || !Expr->getNumElements())
       // Regular entry.
       AP.EmitDwarfRegOp(Streamer, Loc);
     else {
       // Complex address entry.
       if (Loc.getOffset()) {
         DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset());
-        DwarfExpr.AddExpression(Expr.begin(), Expr.end(), PieceOffsetInBits);
+        DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
+                                PieceOffsetInBits);
       } else
         DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(),
                                           PieceOffsetInBits);
@@ -1542,8 +1525,8 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
     unsigned Offset = 0;
     for (auto Piece : Values) {
       DIExpression Expr = Piece.getExpression();
-      unsigned PieceOffset = Expr.getBitPieceOffset();
-      unsigned PieceSize = Expr.getBitPieceSize();
+      unsigned PieceOffset = Expr->getBitPieceOffset();
+      unsigned PieceSize = Expr->getBitPieceSize();
       assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
       if (Offset < PieceOffset) {
         // The DWARF spec seriously mandates pieces with no locations for gaps.
@@ -1554,15 +1537,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
         Offset += PieceOffset-Offset;
       }
       Offset += PieceSize;
-   
-#ifndef NDEBUG
-      DIVariable Var = Piece.getVariable();
-      unsigned VarSize = Var.getSizeInBits(TypeIdentifierMap);
-      assert(PieceSize+PieceOffset <= VarSize
-             && "piece is larger than or outside of variable");
-      assert(PieceSize != VarSize
-             && "piece covers entire variable");
-#endif
+
       emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Piece, PieceOffset);
     }
   } else {
@@ -1850,7 +1825,7 @@ void DwarfDebug::emitDebugRanges() {
 void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
                                   std::unique_ptr<DwarfUnit> NewU) {
   NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
-                  U.getCUNode().getSplitDebugFilename());
+                  U.getCUNode()->getSplitDebugFilename());
 
   if (!CompilationDir.empty())
     NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
@@ -1914,7 +1889,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
   if (!useSplitDwarf())
     return nullptr;
   if (SingleCU)
-    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode().getDirectory());
+    SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory());
   return &SplitTypeUnitFileTable;
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 74db3ef..e067f41 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -21,6 +21,7 @@
 #include "DwarfAccelTable.h"
 #include "DwarfFile.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -74,7 +75,8 @@ public:
 /// - Variables that are described by multiple MMI table entries have multiple
 ///   expressions and frame indices.
 class DbgVariable {
-  DIVariable Var;             /// Variable Descriptor.
+  DIVariable Var;                    /// Variable Descriptor.
+  DILocation IA;                     /// Inlined at location.
   SmallVector<DIExpression, 1> Expr; /// Complex address location expression.
   DIE *TheDIE;                /// Variable DIE.
   unsigned DotDebugLocOffset; /// Offset in DotDebugLocEntries.
@@ -84,11 +86,11 @@ class DbgVariable {
 
 public:
   /// Construct a DbgVariable from a DIVariable.
-    DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD, int FI = ~0)
-    : Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U),
-      MInsn(nullptr), DD(DD) {
+  DbgVariable(DIVariable V, DILocation IA, DIExpression E, DwarfDebug *DD,
+              int FI = ~0)
+      : Var(V), IA(IA), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U),
+        MInsn(nullptr), DD(DD) {
     FrameIndex.push_back(FI);
-    assert(Var.Verify());
     assert(!E || E->isValid());
   }
 
@@ -96,6 +98,7 @@ public:
   /// AbstractVar may be NULL.
   DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
       : Var(DbgValue->getDebugVariable()),
+        IA(DbgValue->getDebugLoc()->getInlinedAt()),
         Expr(1, DbgValue->getDebugExpression()), TheDIE(nullptr),
         DotDebugLocOffset(~0U), MInsn(DbgValue), DD(DD) {
     FrameIndex.push_back(~0);
@@ -103,12 +106,13 @@ public:
 
   // Accessors.
   DIVariable getVariable() const { return Var; }
+  DILocation getInlinedAt() const { return IA; }
   const ArrayRef<DIExpression> getExpression() const { return Expr; }
   void setDIE(DIE &D) { TheDIE = &D; }
   DIE *getDIE() const { return TheDIE; }
   void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
   unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
-  StringRef getName() const { return Var.getName(); }
+  StringRef getName() const { return Var->getName(); }
   const MachineInstr *getMInsn() const { return MInsn; }
   const ArrayRef<int> getFrameIndex() const { return FrameIndex; }
 
@@ -116,6 +120,7 @@ public:
     assert(  DotDebugLocOffset == ~0U &&   !MInsn && "not an MMI entry");
     assert(V.DotDebugLocOffset == ~0U && !V.MInsn && "not an MMI entry");
     assert(V.Var == Var && "conflicting DIVariable");
+    assert(V.IA == IA && "conflicting inlined-at location");
 
     if (V.getFrameIndex().back() != ~0) {
       auto E = V.getExpression();
@@ -124,40 +129,40 @@ public:
       FrameIndex.append(FI.begin(), FI.end());
     }
     assert(Expr.size() > 1
-           ? std::all_of(Expr.begin(), Expr.end(),
-                         [](DIExpression &E) { return E.isBitPiece(); })
-           : (true && "conflicting locations for variable"));
+               ? std::all_of(Expr.begin(), Expr.end(),
+                             [](DIExpression &E) { return E->isBitPiece(); })
+               : (true && "conflicting locations for variable"));
   }
 
   // Translate tag to proper Dwarf tag.
   dwarf::Tag getTag() const {
-    if (Var.getTag() == dwarf::DW_TAG_arg_variable)
+    if (Var->getTag() == dwarf::DW_TAG_arg_variable)
       return dwarf::DW_TAG_formal_parameter;
 
     return dwarf::DW_TAG_variable;
   }
   /// \brief Return true if DbgVariable is artificial.
   bool isArtificial() const {
-    if (Var.isArtificial())
+    if (Var->isArtificial())
       return true;
-    if (getType().isArtificial())
+    if (getType()->isArtificial())
       return true;
     return false;
   }
 
   bool isObjectPointer() const {
-    if (Var.isObjectPointer())
+    if (Var->isObjectPointer())
       return true;
-    if (getType().isObjectPointer())
+    if (getType()->isObjectPointer())
       return true;
     return false;
   }
 
   bool variableHasComplexAddress() const {
-    assert(Var.isVariable() && "Invalid complex DbgVariable!");
+    assert(Var && "Invalid complex DbgVariable!");
     assert(Expr.size() == 1 &&
            "variableHasComplexAddress() invoked on multi-FI variable");
-    return Expr.back().getNumElements() > 0;
+    return Expr.back()->getNumElements() > 0;
   }
   bool isBlockByrefVariable() const;
   DIType getType() const;
@@ -165,7 +170,7 @@ public:
 private:
   /// resolve - Look in the DwarfDebug map for the MDNode that
   /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const;
+  template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const;
 };
 
 
@@ -324,14 +329,16 @@ class DwarfDebug : public AsmPrinterHandler {
     return InfoHolder.getUnits();
   }
 
+  typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+
   /// \brief Find abstract variable associated with Var.
-  DbgVariable *getExistingAbstractVariable(const DIVariable &DV,
+  DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
                                            DIVariable &Cleansed);
-  DbgVariable *getExistingAbstractVariable(const DIVariable &DV);
+  DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
   void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope);
-  void ensureAbstractVariableIsCreated(const DIVariable &Var,
+  void ensureAbstractVariableIsCreated(InlinedVariable Var,
                                        const MDNode *Scope);
-  void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
+  void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var,
                                                const MDNode *Scope);
 
   /// \brief Construct a DIE for this abstract scope.
@@ -461,7 +468,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   /// \brief Populate LexicalScope entries with variables' info.
   void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
-                           SmallPtrSetImpl<const MDNode *> &ProcessedVars);
+                           DenseSet<InlinedVariable> &ProcessedVars);
 
   /// \brief Build the location list for all DBG_VALUEs in the
   /// function that describe the same variable.
@@ -470,7 +477,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   /// \brief Collect variable information from the side table maintained
   /// by MMI.
-  void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P);
+  void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
 
   /// \brief Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
@@ -567,7 +574,7 @@ public:
   void emitDebugLocEntryLocation(const DebugLocEntry &Entry);
 
   /// Find the MDNode for the given reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
+  template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const {
     return Ref.resolve(TypeIdentifierMap);
   }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 6eaf707..a4fd36f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -48,7 +48,7 @@ public:
   // Main entry points.
   //
   DwarfCFIException(AsmPrinter *A);
-  virtual ~DwarfCFIException();
+  ~DwarfCFIException() override;
 
   /// Emit all exception information that should come after the content.
   void endModule() override;
@@ -70,7 +70,7 @@ public:
   // Main entry points.
   //
   ARMException(AsmPrinter *A);
-  virtual ~ARMException();
+  ~ARMException() override;
 
   /// Emit all exception information that should come after the content.
   void endModule() override;
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 489e455..e576c93 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -195,27 +195,27 @@ static unsigned getOffsetOrZero(unsigned OffsetInBits,
 bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
                                               unsigned MachineReg,
                                               unsigned PieceOffsetInBits) {
-  auto I = Expr.begin();
-  auto E = Expr.end();
+  auto I = Expr->expr_op_begin();
+  auto E = Expr->expr_op_end();
   if (I == E)
     return AddMachineRegPiece(MachineReg);
 
   // Pattern-match combinations for which more efficient representations exist
   // first.
   bool ValidReg = false;
-  switch (*I) {
+  switch (I->getOp()) {
   case dwarf::DW_OP_bit_piece: {
-    unsigned OffsetInBits = I->getArg(1);
-    unsigned SizeInBits   = I->getArg(2);
+    unsigned OffsetInBits = I->getArg(0);
+    unsigned SizeInBits   = I->getArg(1);
     // Piece always comes at the end of the expression.
     return AddMachineRegPiece(MachineReg, SizeInBits,
                getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
   }
   case dwarf::DW_OP_plus: {
     // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
-    auto N = I->getNext();
-    if ((N != E) && (*N == dwarf::DW_OP_deref)) {
-      unsigned Offset = I->getArg(1);
+    auto N = I.getNext();
+    if (N != E && N->getOp() == dwarf::DW_OP_deref) {
+      unsigned Offset = I->getArg(0);
       ValidReg = AddMachineRegIndirect(MachineReg, Offset);
       std::advance(I, 2);
       break;
@@ -240,20 +240,20 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
   return true;
 }
 
-void DwarfExpression::AddExpression(DIExpression::iterator I,
-                                    DIExpression::iterator E,
+void DwarfExpression::AddExpression(MDExpression::expr_op_iterator I,
+                                    MDExpression::expr_op_iterator E,
                                     unsigned PieceOffsetInBits) {
   for (; I != E; ++I) {
-    switch (*I) {
+    switch (I->getOp()) {
     case dwarf::DW_OP_bit_piece: {
-      unsigned OffsetInBits = I->getArg(1);
-      unsigned SizeInBits   = I->getArg(2);
+      unsigned OffsetInBits = I->getArg(0);
+      unsigned SizeInBits   = I->getArg(1);
       AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
       break;
     }
     case dwarf::DW_OP_plus:
       EmitOp(dwarf::DW_OP_plus_uconst);
-      EmitUnsigned(I->getArg(1));
+      EmitUnsigned(I->getArg(0));
       break;
     case dwarf::DW_OP_deref:
       EmitOp(dwarf::DW_OP_deref);
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 985d52c..a8b65f5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -97,7 +97,8 @@ public:
   /// Emit a the operations remaining the DIExpressionIterator I.
   /// \param PieceOffsetInBits If this is one piece out of a fragmented
   /// location, this is the offset of the piece inside the entire variable.
-  void AddExpression(DIExpression::iterator I, DIExpression::iterator E,
+  void AddExpression(MDExpression::expr_op_iterator I,
+                     MDExpression::expr_op_iterator E,
                      unsigned PieceOffsetInBits = 0);
 };
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 60acc58e..32adb40 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -139,7 +139,7 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
   SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
   DIVariable DV = Var->getVariable();
   // Variables with positive arg numbers are parameters.
-  if (unsigned ArgNum = DV.getArgNumber()) {
+  if (unsigned ArgNum = DV->getArg()) {
     // Keep all parameters in order at the start of the variable list to ensure
     // function types are correct (no out-of-order parameters)
     //
@@ -149,7 +149,7 @@ bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
     // rather than linear search.
     auto I = Vars.begin();
     while (I != Vars.end()) {
-      unsigned CurNum = (*I)->getVariable().getArgNumber();
+      unsigned CurNum = (*I)->getVariable()->getArg();
       // A local (non-parameter) variable has been found, insert immediately
       // before it.
       if (CurNum == 0)
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index f6af73f..9154652 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -175,8 +175,8 @@ static bool isShareableAcrossCUs(DIDescriptor D) {
   // level already) but may be implementable for some value in projects
   // building multiple independent libraries with LTO and then linking those
   // together.
-  return (D.isType() ||
-          (D.isSubprogram() && !DISubprogram(D).isDefinition())) &&
+  return (isa<MDType>(D) ||
+          (isa<MDSubprogram>(D) && !cast<MDSubprogram>(D)->isDefinition())) &&
          !GenerateDwarfTypeUnits;
 }
 
@@ -397,52 +397,48 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) {
-  assert(V.isVariable());
+  assert(V);
 
-  addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(),
-                V.getContext().getDirectory());
+  addSourceLine(Die, V->getLine(), V->getScope()->getFilename(),
+                V->getScope()->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) {
-  assert(G.isGlobalVariable());
+  assert(G);
 
-  addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory());
+  addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) {
-  assert(SP.isSubprogram());
+  assert(SP);
 
-  addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory());
+  addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) {
-  assert(Ty.isType());
+  assert(Ty);
 
-  addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) {
-  assert(Ty.isObjCProperty());
+  assert(Ty);
 
-  DIFile File = Ty.getFile();
-  addSourceLine(Die, Ty.getLineNumber(), File.getFilename(),
-                File.getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
 }
 
 /// addSourceLine - Add location information to specified debug information
 /// entry.
 void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) {
-  assert(NS.Verify());
-
-  addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory());
+  addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
 }
 
 /// addRegisterOp - Add register operand.
@@ -525,28 +521,26 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
                                      const MachineLocation &Location) {
   DIType Ty = DV.getType();
   DIType TmpTy = Ty;
-  uint16_t Tag = Ty.getTag();
+  uint16_t Tag = Ty->getTag();
   bool isPointer = false;
 
   StringRef varName = DV.getName();
 
   if (Tag == dwarf::DW_TAG_pointer_type) {
-    DIDerivedType DTy(Ty);
-    TmpTy = resolve(DTy.getTypeDerivedFrom());
+    DIDerivedType DTy = cast<MDDerivedType>(Ty);
+    TmpTy = resolve(DTy->getBaseType());
     isPointer = true;
   }
 
-  DICompositeType blockStruct(TmpTy);
-
   // Find the __forwarding field and the variable field in the __Block_byref
   // struct.
-  DIArray Fields = blockStruct.getElements();
+  DIArray Fields = cast<MDCompositeTypeBase>(TmpTy)->getElements();
   DIDerivedType varField;
   DIDerivedType forwardingField;
 
-  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
-    DIDerivedType DT(Fields.getElement(i));
-    StringRef fieldName = DT.getName();
+  for (unsigned i = 0, N = Fields.size(); i < N; ++i) {
+    DIDerivedType DT = cast<MDDerivedTypeBase>(Fields[i]);
+    StringRef fieldName = DT->getName();
     if (fieldName == "__forwarding")
       forwardingField = DT;
     else if (fieldName == varName)
@@ -554,8 +548,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   }
 
   // Get the offsets for the forwarding field and the variable field.
-  unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3;
-  unsigned varFieldOffset = varField.getOffsetInBits() >> 2;
+  unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3;
+  unsigned varFieldOffset = varField->getOffsetInBits() >> 2;
 
   // Decode the original location, and use that as the start of the byref
   // variable's location.
@@ -601,9 +595,8 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
 
 /// Return true if type encoding is unsigned.
 static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
-  DIDerivedType DTy(Ty);
-  if (DTy.isDerivedType()) {
-    dwarf::Tag T = (dwarf::Tag)Ty.getTag();
+  if (DIDerivedType DTy = dyn_cast<MDDerivedTypeBase>(Ty)) {
+    dwarf::Tag T = (dwarf::Tag)Ty->getTag();
     // Encode pointer constants as unsigned bytes. This is used at least for
     // null pointer constant emission.
     // (Pieces of) aggregate types that get hacked apart by SROA may also be
@@ -624,56 +617,55 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
            T == dwarf::DW_TAG_volatile_type ||
            T == dwarf::DW_TAG_restrict_type ||
            T == dwarf::DW_TAG_enumeration_type);
-    if (DITypeRef Deriv = DTy.getTypeDerivedFrom())
+    if (DITypeRef Deriv = DTy->getBaseType())
       return isUnsignedDIType(DD, DD->resolve(Deriv));
     // FIXME: Enums without a fixed underlying type have unknown signedness
     // here, leading to incorrectly emitted constants.
-    assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type);
+    assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type);
     return false;
   }
 
-  DIBasicType BTy(Ty);
-  assert(BTy.isBasicType());
-  unsigned Encoding = BTy.getEncoding();
+  DIBasicType BTy = cast<MDBasicType>(Ty);
+  unsigned Encoding = BTy->getEncoding();
   assert((Encoding == dwarf::DW_ATE_unsigned ||
           Encoding == dwarf::DW_ATE_unsigned_char ||
           Encoding == dwarf::DW_ATE_signed ||
           Encoding == dwarf::DW_ATE_signed_char ||
-          Encoding == dwarf::DW_ATE_float ||
-          Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
-          (Ty.getTag() == dwarf::DW_TAG_unspecified_type &&
-           Ty.getName() == "decltype(nullptr)")) &&
+          Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
+          Encoding == dwarf::DW_ATE_boolean ||
+          (Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
+           Ty->getName() == "decltype(nullptr)")) &&
          "Unsupported encoding");
-  return (Encoding == dwarf::DW_ATE_unsigned ||
-          Encoding == dwarf::DW_ATE_unsigned_char ||
-          Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
-          Ty.getTag() == dwarf::DW_TAG_unspecified_type);
+  return Encoding == dwarf::DW_ATE_unsigned ||
+         Encoding == dwarf::DW_ATE_unsigned_char ||
+         Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean ||
+         Ty->getTag() == dwarf::DW_TAG_unspecified_type;
 }
 
 /// If this type is derived from a base type then return base type size.
 static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {
-  unsigned Tag = Ty.getTag();
+  unsigned Tag = Ty->getTag();
 
   if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
       Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
       Tag != dwarf::DW_TAG_restrict_type)
-    return Ty.getSizeInBits();
+    return Ty->getSizeInBits();
 
-  DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom());
+  auto *BaseType = DD->resolve(Ty->getBaseType());
 
-  assert(BaseType.isValid() && "Unexpected invalid base type");
+  assert(BaseType && "Unexpected invalid base type");
 
   // If this is a derived type, go ahead and get the base type, unless it's a
   // reference then it's just the size of the field. Pointer types have no need
   // of this since they're a different type of qualification on the type.
-  if (BaseType.getTag() == dwarf::DW_TAG_reference_type ||
-      BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type)
-    return Ty.getSizeInBits();
+  if (BaseType->getTag() == dwarf::DW_TAG_reference_type ||
+      BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type)
+    return Ty->getSizeInBits();
 
-  if (BaseType.isDerivedType())
-    return getBaseTypeSize(DD, DIDerivedType(BaseType));
+  if (auto *DT = dyn_cast<MDDerivedTypeBase>(BaseType))
+    return getBaseTypeSize(DD, DT);
 
-  return BaseType.getSizeInBits();
+  return BaseType->getSizeInBits();
 }
 
 /// addConstantFPValue - Add constant value entry in variable DIE.
@@ -771,39 +763,37 @@ void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
 /// addTemplateParams - Add template parameters into buffer.
 void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
   // Add template parameters.
-  for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
-    DIDescriptor Element = TParams.getElement(i);
-    if (Element.isTemplateTypeParameter())
-      constructTemplateTypeParameterDIE(Buffer,
-                                        DITemplateTypeParameter(Element));
-    else if (Element.isTemplateValueParameter())
-      constructTemplateValueParameterDIE(Buffer,
-                                         DITemplateValueParameter(Element));
+  for (unsigned i = 0, e = TParams.size(); i != e; ++i) {
+    DIDescriptor Element = TParams[i];
+    if (auto *TTP = dyn_cast<MDTemplateTypeParameter>(Element))
+      constructTemplateTypeParameterDIE(Buffer, TTP);
+    else if (auto *TVP = dyn_cast<MDTemplateValueParameter>(Element))
+      constructTemplateValueParameterDIE(Buffer, TVP);
   }
 }
 
 /// getOrCreateContextDIE - Get context owner's DIE.
 DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) {
-  if (!Context || Context.isFile())
+  if (!Context || isa<MDFile>(Context))
     return &getUnitDie();
-  if (Context.isType())
-    return getOrCreateTypeDIE(DIType(Context));
-  if (Context.isNameSpace())
-    return getOrCreateNameSpace(DINameSpace(Context));
-  if (Context.isSubprogram())
-    return getOrCreateSubprogramDIE(DISubprogram(Context));
+  if (auto *T = dyn_cast<MDType>(Context))
+    return getOrCreateTypeDIE(T);
+  if (auto *NS = dyn_cast<MDNamespace>(Context))
+    return getOrCreateNameSpace(NS);
+  if (auto *SP = dyn_cast<MDSubprogram>(Context))
+    return getOrCreateSubprogramDIE(SP);
   return getDIE(Context);
 }
 
 DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) {
-  DIScope Context = resolve(Ty.getContext());
+  DIScope Context = resolve(Ty->getScope());
   DIE *ContextDIE = getOrCreateContextDIE(Context);
 
   if (DIE *TyDIE = getDIE(Ty))
     return TyDIE;
 
   // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
 
   constructTypeDIE(TyDIE, Ty);
 
@@ -817,18 +807,18 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   if (!TyNode)
     return nullptr;
 
-  DIType Ty(TyNode);
-  assert(Ty.isType());
-  assert(Ty == resolve(Ty.getRef()) &&
+  auto *Ty = cast<MDType>(TyNode);
+  assert(Ty == resolve(Ty->getRef()) &&
          "type was not uniqued, possible ODR violation.");
 
   // DW_TAG_restrict_type is not supported in DWARF2
-  if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
-    return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom()));
+  if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
+    return getOrCreateTypeDIE(
+        resolve(DITypeRef(cast<MDDerivedType>(Ty)->getBaseType())));
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIScope Context = resolve(Ty.getContext());
+  DIScope Context = resolve(Ty->getScope());
   DIE *ContextDIE = getOrCreateContextDIE(Context);
   assert(ContextDIE);
 
@@ -836,24 +826,22 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
     return TyDIE;
 
   // Create new type.
-  DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+  DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
 
   updateAcceleratorTables(Context, Ty, TyDIE);
 
-  if (Ty.isBasicType())
-    constructTypeDIE(TyDIE, DIBasicType(Ty));
-  else if (Ty.isCompositeType()) {
-    DICompositeType CTy(Ty);
-    if (GenerateDwarfTypeUnits && !Ty.isForwardDecl())
-      if (MDString *TypeId = CTy.getIdentifier()) {
+  if (auto *BT = dyn_cast<MDBasicType>(Ty))
+    constructTypeDIE(TyDIE, BT);
+  else if (DICompositeType CTy = dyn_cast<MDCompositeTypeBase>(Ty)) {
+    if (GenerateDwarfTypeUnits && !Ty->isForwardDecl())
+      if (MDString *TypeId = CTy->getRawIdentifier()) {
         DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
         // Skip updating the accelerator tables since this is not the full type.
         return &TyDIE;
       }
     constructTypeDIE(TyDIE, CTy);
   } else {
-    assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    constructTypeDIE(TyDIE, DIDerivedType(Ty));
+    constructTypeDIE(TyDIE, cast<MDDerivedType>(Ty));
   }
 
   return &TyDIE;
@@ -861,19 +849,18 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
 
 void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
                                         const DIE &TyDIE) {
-  if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+  if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
     bool IsImplementation = 0;
-    if (Ty.isCompositeType()) {
-      DICompositeType CT(Ty);
+    if (auto *CT = dyn_cast<MDCompositeTypeBase>(Ty)) {
       // A runtime language of 0 actually means C/C++ and that any
       // non-negative value is some version of Objective-C/C++.
-      IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete();
+      IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
     }
     unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
-    DD->addAccelType(Ty.getName(), TyDIE, Flags);
+    DD->addAccelType(Ty->getName(), TyDIE, Flags);
 
-    if (!Context || Context.isCompileUnit() || Context.isFile() ||
-        Context.isNameSpace())
+    if (!Context || isa<MDCompileUnit>(Context) || isa<MDFile>(Context) ||
+        isa<MDNamespace>(Context))
       addGlobalType(Ty, TyDIE, Context);
   }
 }
@@ -914,10 +901,10 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
 
   std::string CS;
   SmallVector<DIScope, 1> Parents;
-  while (!Context.isCompileUnit()) {
+  while (!isa<MDCompileUnit>(Context)) {
     Parents.push_back(Context);
-    if (Context.getContext())
-      Context = resolve(Context.getContext());
+    if (Context->getScope())
+      Context = resolve(Context->getScope());
     else
       // Structure, etc types will have a NULL context if they're at the top
       // level.
@@ -929,9 +916,9 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
   for (SmallVectorImpl<DIScope>::reverse_iterator I = Parents.rbegin(),
                                                   E = Parents.rend();
        I != E; ++I) {
-    DIScope Ctx = *I;
-    StringRef Name = Ctx.getName();
-    if (Name.empty() && Ctx.isNameSpace())
+    const MDScope *Ctx = *I;
+    StringRef Name = Ctx->getName();
+    if (Name.empty() && isa<MDNamespace>(Ctx))
       Name = "(anonymous namespace)";
     if (!Name.empty()) {
       CS += Name;
@@ -944,31 +931,31 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
 /// constructTypeDIE - Construct basic type die from DIBasicType.
 void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
   // Get core information.
-  StringRef Name = BTy.getName();
+  StringRef Name = BTy->getName();
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
     addString(Buffer, dwarf::DW_AT_name, Name);
 
   // An unspecified type only has a name attribute.
-  if (BTy.getTag() == dwarf::DW_TAG_unspecified_type)
+  if (BTy->getTag() == dwarf::DW_TAG_unspecified_type)
     return;
 
   addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
-          BTy.getEncoding());
+          BTy->getEncoding());
 
-  uint64_t Size = BTy.getSizeInBits() >> 3;
+  uint64_t Size = BTy->getSizeInBits() >> 3;
   addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 }
 
 /// constructTypeDIE - Construct derived type die from DIDerivedType.
 void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
   // Get core information.
-  StringRef Name = DTy.getName();
-  uint64_t Size = DTy.getSizeInBits() >> 3;
+  StringRef Name = DTy->getName();
+  uint64_t Size = DTy->getSizeInBits() >> 3;
   uint16_t Tag = Buffer.getTag();
 
   // Map to main type, void will not have a type.
-  DIType FromTy = resolve(DTy.getTypeDerivedFrom());
+  DIType FromTy = resolve(DTy->getBaseType());
   if (FromTy)
     addType(Buffer, FromTy);
 
@@ -982,24 +969,25 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
     addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 
   if (Tag == dwarf::DW_TAG_ptr_to_member_type)
-    addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
-                *getOrCreateTypeDIE(resolve(DTy.getClassType())));
+    addDIEEntry(
+        Buffer, dwarf::DW_AT_containing_type,
+        *getOrCreateTypeDIE(resolve(cast<MDDerivedType>(DTy)->getClassType())));
   // Add source line info if available and TyDesc is not a forward declaration.
-  if (!DTy.isForwardDecl())
+  if (!DTy->isForwardDecl())
     addSourceLine(Buffer, DTy);
 }
 
 /// constructSubprogramArguments - Construct function argument DIEs.
 void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) {
-  for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
-    DIType Ty = resolve(Args.getElement(i));
+  for (unsigned i = 1, N = Args.size(); i < N; ++i) {
+    DIType Ty = resolve(Args[i]);
     if (!Ty) {
       assert(i == N-1 && "Unspecified parameter must be the last argument");
       createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
     } else {
       DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
       addType(Arg, Ty);
-      if (Ty.isArtificial())
+      if (Ty->isArtificial())
         addFlag(Arg, dwarf::DW_AT_artificial);
     }
   }
@@ -1008,9 +996,9 @@ void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) {
 /// constructTypeDIE - Construct type DIE from DICompositeType.
 void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   // Add name if not anonymous or intermediate type.
-  StringRef Name = CTy.getName();
+  StringRef Name = CTy->getName();
 
-  uint64_t Size = CTy.getSizeInBits() >> 3;
+  uint64_t Size = CTy->getSizeInBits() >> 3;
   uint16_t Tag = Buffer.getTag();
 
   switch (Tag) {
@@ -1022,14 +1010,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     break;
   case dwarf::DW_TAG_subroutine_type: {
     // Add return type. A void return won't have a type.
-    DITypeArray Elements = DISubroutineType(CTy).getTypeArray();
-    DIType RTy(resolve(Elements.getElement(0)));
-    if (RTy)
-      addType(Buffer, RTy);
+    auto Elements = cast<MDSubroutineType>(CTy)->getTypeArray();
+    if (Elements.size())
+      if (auto RTy = resolve(Elements[0]))
+        addType(Buffer, RTy);
 
     bool isPrototyped = true;
-    if (Elements.getNumElements() == 2 &&
-        !Elements.getElement(1))
+    if (Elements.size() == 2 && !Elements[1])
       isPrototyped = false;
 
     constructSubprogramArguments(Buffer, Elements);
@@ -1042,60 +1029,46 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
          Language == dwarf::DW_LANG_ObjC))
       addFlag(Buffer, dwarf::DW_AT_prototyped);
 
-    if (CTy.isLValueReference())
+    if (CTy->isLValueReference())
       addFlag(Buffer, dwarf::DW_AT_reference);
 
-    if (CTy.isRValueReference())
+    if (CTy->isRValueReference())
       addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
   } break;
   case dwarf::DW_TAG_structure_type:
   case dwarf::DW_TAG_union_type:
   case dwarf::DW_TAG_class_type: {
     // Add elements to structure type.
-    DIArray Elements = CTy.getElements();
-    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-      DIDescriptor Element = Elements.getElement(i);
-      if (Element.isSubprogram())
-        getOrCreateSubprogramDIE(DISubprogram(Element));
-      else if (Element.isDerivedType()) {
-        DIDerivedType DDTy(Element);
-        if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+    DIArray Elements = CTy->getElements();
+    for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+      DIDescriptor Element = Elements[i];
+      if (!Element)
+        continue;
+      if (auto *SP = dyn_cast<MDSubprogram>(Element))
+        getOrCreateSubprogramDIE(SP);
+      else if (DIDerivedType DDTy = dyn_cast<MDDerivedTypeBase>(Element)) {
+        if (DDTy->getTag() == dwarf::DW_TAG_friend) {
           DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
-          addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()),
-                  dwarf::DW_AT_friend);
-        } else if (DDTy.isStaticMember()) {
+          addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
+        } else if (DDTy->isStaticMember()) {
           getOrCreateStaticMemberDIE(DDTy);
         } else {
           constructMemberDIE(Buffer, DDTy);
         }
-      } else if (Element.isObjCProperty()) {
-        DIObjCProperty Property(Element);
-        DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer);
-        StringRef PropertyName = Property.getObjCPropertyName();
+      } else if (DIObjCProperty Property = dyn_cast<MDObjCProperty>(Element)) {
+        DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer);
+        StringRef PropertyName = Property->getName();
         addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
-        if (Property.getType())
-          addType(ElemDie, Property.getType());
+        if (Property->getType())
+          addType(ElemDie, Property->getType());
         addSourceLine(ElemDie, Property);
-        StringRef GetterName = Property.getObjCPropertyGetterName();
+        StringRef GetterName = Property->getGetterName();
         if (!GetterName.empty())
           addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
-        StringRef SetterName = Property.getObjCPropertySetterName();
+        StringRef SetterName = Property->getSetterName();
         if (!SetterName.empty())
           addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
-        unsigned PropertyAttributes = 0;
-        if (Property.isReadOnlyObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
-        if (Property.isReadWriteObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
-        if (Property.isAssignObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
-        if (Property.isRetainObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
-        if (Property.isCopyObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
-        if (Property.isNonAtomicObjCProperty())
-          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
-        if (PropertyAttributes)
+        if (unsigned PropertyAttributes = Property->getAttributes())
           addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None,
                   PropertyAttributes);
 
@@ -1104,28 +1077,27 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
           Entry = createDIEEntry(ElemDie);
           insertDIEEntry(Element, Entry);
         }
-      } else
-        continue;
+      }
     }
 
-    if (CTy.isAppleBlockExtension())
+    if (CTy->isAppleBlockExtension())
       addFlag(Buffer, dwarf::DW_AT_APPLE_block);
 
     // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
     // inside C++ composite types to point to the base class with the vtable.
-    DICompositeType ContainingType(resolve(CTy.getContainingType()));
-    if (ContainingType)
+    if (DICompositeType ContainingType =
+            dyn_cast_or_null<MDCompositeType>(resolve(CTy->getVTableHolder())))
       addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
                   *getOrCreateTypeDIE(ContainingType));
 
-    if (CTy.isObjcClassComplete())
+    if (CTy->isObjcClassComplete())
       addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
 
     // Add template parameters to a class, structure or union types.
     // FIXME: The support isn't in the metadata for this yet.
     if (Tag == dwarf::DW_TAG_class_type ||
         Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
-      addTemplateParams(Buffer, CTy.getTemplateParams());
+      addTemplateParams(Buffer, CTy->getTemplateParams());
 
     break;
   }
@@ -1144,20 +1116,20 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // TODO: Do we care about size for enum forward declarations?
     if (Size)
       addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
-    else if (!CTy.isForwardDecl())
+    else if (!CTy->isForwardDecl())
       // Add zero size if it is not a forward declaration.
       addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);
 
     // If we're a forward decl, say so.
-    if (CTy.isForwardDecl())
+    if (CTy->isForwardDecl())
       addFlag(Buffer, dwarf::DW_AT_declaration);
 
     // Add source line info if available.
-    if (!CTy.isForwardDecl())
+    if (!CTy->isForwardDecl())
       addSourceLine(Buffer, CTy);
 
     // No harm in adding the runtime language to the declaration.
-    unsigned RLang = CTy.getRunTimeLang();
+    unsigned RLang = CTy->getRuntimeLang();
     if (RLang)
       addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
               RLang);
@@ -1171,10 +1143,10 @@ void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
   DIE &ParamDIE =
       createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
   // Add the type if it exists, it could be void and therefore no type.
-  if (TP.getType())
-    addType(ParamDIE, resolve(TP.getType()));
-  if (!TP.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
+  if (TP->getType())
+    addType(ParamDIE, resolve(TP->getType()));
+  if (!TP->getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
 }
 
 /// constructTemplateValueParameterDIE - Construct new DIE for the given
@@ -1182,17 +1154,17 @@ void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
 void
 DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
                                               DITemplateValueParameter VP) {
-  DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer);
+  DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer);
 
   // Add the type if there is one, template template and template parameter
   // packs will not have a type.
-  if (VP.getTag() == dwarf::DW_TAG_template_value_parameter)
-    addType(ParamDIE, resolve(VP.getType()));
-  if (!VP.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, VP.getName());
-  if (Metadata *Val = VP.getValue()) {
+  if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
+    addType(ParamDIE, resolve(VP->getType()));
+  if (!VP->getName().empty())
+    addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
+  if (Metadata *Val = VP->getValue()) {
     if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
-      addConstantValue(ParamDIE, CI, resolve(VP.getType()));
+      addConstantValue(ParamDIE, CI, resolve(VP->getType()));
     else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
       // For declaration non-type template parameters (such as global values and
       // functions)
@@ -1202,14 +1174,12 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
       // parameter, rather than a pointer to it.
       addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
       addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
-    } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+    } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) {
       assert(isa<MDString>(Val));
       addString(ParamDIE, dwarf::DW_AT_GNU_template_name,
                 cast<MDString>(Val)->getString());
-    } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
-      assert(isa<MDNode>(Val));
-      DIArray A(cast<MDNode>(Val));
-      addTemplateParams(ParamDIE, A);
+    } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+      addTemplateParams(ParamDIE, cast<MDTuple>(Val));
     }
   }
 }
@@ -1218,19 +1188,19 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
 DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(NS.getContext());
+  DIE *ContextDIE = getOrCreateContextDIE(NS->getScope());
 
   if (DIE *NDie = getDIE(NS))
     return NDie;
   DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
 
-  StringRef Name = NS.getName();
+  StringRef Name = NS->getName();
   if (!Name.empty())
-    addString(NDie, dwarf::DW_AT_name, NS.getName());
+    addString(NDie, dwarf::DW_AT_name, NS->getName());
   else
     Name = "(anonymous namespace)";
   DD->addAccelNamespace(Name, NDie);
-  addGlobalName(Name, NDie, NS.getContext());
+  addGlobalName(Name, NDie, NS->getScope());
   addSourceLine(NDie, NS);
   return &NDie;
 }
@@ -1241,12 +1211,12 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
   // such construction creates the DIE (as is the case for member function
   // declarations).
   DIE *ContextDIE =
-      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext()));
+      Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope()));
 
   if (DIE *SPDie = getDIE(SP))
     return SPDie;
 
-  if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  if (auto *SPDecl = SP->getDeclaration()) {
     if (!Minimal) {
       // Add subprogram definitions to the CU die directly.
       ContextDIE = &getUnitDie();
@@ -1260,7 +1230,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) {
 
   // Stop here and fill this in later, depending on whether or not this
   // subprogram turns out to have inlined instances or not.
-  if (SP.isDefinition())
+  if (SP->isDefinition())
     return &SPDie;
 
   applySubprogramAttributes(SP, SPDie);
@@ -1271,19 +1241,19 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
                                                     DIE &SPDie) {
   DIE *DeclDie = nullptr;
   StringRef DeclLinkageName;
-  if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+  if (auto *SPDecl = SP->getDeclaration()) {
     DeclDie = getDIE(SPDecl);
     assert(DeclDie && "This DIE should've already been constructed when the "
                       "definition DIE was created in "
                       "getOrCreateSubprogramDIE");
-    DeclLinkageName = SPDecl.getLinkageName();
+    DeclLinkageName = SPDecl->getLinkageName();
   }
 
   // Add function template parameters.
-  addTemplateParams(SPDie, SP.getTemplateParams());
+  addTemplateParams(SPDie, SP->getTemplateParams());
 
   // Add the linkage name if we have one and it isn't in the Decl.
-  StringRef LinkageName = SP.getLinkageName();
+  StringRef LinkageName = SP->getLinkageName();
   assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
           LinkageName == DeclLinkageName) &&
          "decl has a linkage name and it is different");
@@ -1306,8 +1276,8 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
       return;
 
   // Constructors and operators for anonymous aggregates do not have names.
-  if (!SP.getName().empty())
-    addString(SPDie, dwarf::DW_AT_name, SP.getName());
+  if (!SP->getName().empty())
+    addString(SPDie, dwarf::DW_AT_name, SP->getName());
 
   // Skip the rest of the attributes under -gmlt to save space.
   if (Minimal)
@@ -1318,33 +1288,34 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
   // Add the prototype if we have a prototype and we have a C like
   // language.
   uint16_t Language = getLanguage();
-  if (SP.isPrototyped() &&
+  if (SP->isPrototyped() &&
       (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
        Language == dwarf::DW_LANG_ObjC))
     addFlag(SPDie, dwarf::DW_AT_prototyped);
 
-  DISubroutineType SPTy = SP.getType();
-  assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type &&
+  DISubroutineType SPTy = SP->getType();
+  assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type &&
          "the type of a subprogram should be a subroutine");
 
-  DITypeArray Args = SPTy.getTypeArray();
+  auto Args = SPTy->getTypeArray();
   // Add a return type. If this is a type like a C/C++ void type we don't add a
   // return type.
-  if (resolve(Args.getElement(0)))
-    addType(SPDie, DIType(resolve(Args.getElement(0))));
+  if (Args.size())
+    if (auto Ty = resolve(Args[0]))
+      addType(SPDie, Ty);
 
-  unsigned VK = SP.getVirtuality();
+  unsigned VK = SP->getVirtuality();
   if (VK) {
     addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
     DIELoc *Block = getDIELoc();
     addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+    addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
     addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
     ContainingTypeMap.insert(
-        std::make_pair(&SPDie, resolve(SP.getContainingType())));
+        std::make_pair(&SPDie, resolve(SP->getContainingType())));
   }
 
-  if (!SP.isDefinition()) {
+  if (!SP->isDefinition()) {
     addFlag(SPDie, dwarf::DW_AT_declaration);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
@@ -1352,35 +1323,35 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
     constructSubprogramArguments(SPDie, Args);
   }
 
-  if (SP.isArtificial())
+  if (SP->isArtificial())
     addFlag(SPDie, dwarf::DW_AT_artificial);
 
-  if (!SP.isLocalToUnit())
+  if (!SP->isLocalToUnit())
     addFlag(SPDie, dwarf::DW_AT_external);
 
-  if (SP.isOptimized())
+  if (SP->isOptimized())
     addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
 
   if (unsigned isa = Asm->getISAEncoding())
     addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
 
-  if (SP.isLValueReference())
+  if (SP->isLValueReference())
     addFlag(SPDie, dwarf::DW_AT_reference);
 
-  if (SP.isRValueReference())
+  if (SP->isRValueReference())
     addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
 
-  if (SP.isProtected())
+  if (SP->isProtected())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (SP.isPrivate())
+  else if (SP->isPrivate())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
-  else if (SP.isPublic())
+  else if (SP->isPublic())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
 
-  if (SP.isExplicit())
+  if (SP->isExplicit())
     addFlag(SPDie, dwarf::DW_AT_explicit);
 }
 
@@ -1393,9 +1364,9 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
   // C/C++. The Count value is the number of elements.  Values are 64 bit. If
   // Count == -1 then the array is unbounded and we do not emit
   // DW_AT_lower_bound and DW_AT_count attributes.
-  int64_t LowerBound = SR.getLo();
+  int64_t LowerBound = SR->getLowerBound();
   int64_t DefaultLowerBound = getDefaultLowerBound();
-  int64_t Count = SR.getCount();
+  int64_t Count = SR->getCount();
 
   if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
     addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound);
@@ -1420,11 +1391,11 @@ DIE *DwarfUnit::getIndexTyDie() {
 
 /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
 void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  if (CTy.isVector())
+  if (CTy->isVector())
     addFlag(Buffer, dwarf::DW_AT_GNU_vector);
 
   // Emit the element type.
-  addType(Buffer, resolve(CTy.getTypeDerivedFrom()));
+  addType(Buffer, resolve(CTy->getBaseType()));
 
   // Get an anonymous type for index type.
   // FIXME: This type should be passed down from the front end
@@ -1432,31 +1403,32 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
   DIE *IdxTy = getIndexTyDie();
 
   // Add subranges to array type.
-  DIArray Elements = CTy.getElements();
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIDescriptor Element = Elements.getElement(i);
-    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
-      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+  DIArray Elements = CTy->getElements();
+  for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+    // FIXME: Should this really be such a loose cast?
+    if (auto *Element = dyn_cast_or_null<DebugNode>(Elements[i]))
+      if (Element->getTag() == dwarf::DW_TAG_subrange_type)
+        constructSubrangeDIE(Buffer, cast<MDSubrange>(Element), IdxTy);
   }
 }
 
 /// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType.
 void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
-  DIArray Elements = CTy.getElements();
+  DIArray Elements = CTy->getElements();
 
   // Add enumerators to enumeration type.
-  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-    DIEnumerator Enum(Elements.getElement(i));
-    if (Enum.isEnumerator()) {
+  for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
+    auto *Enum = dyn_cast_or_null<MDEnumerator>(Elements[i]);
+    if (Enum) {
       DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
-      StringRef Name = Enum.getName();
+      StringRef Name = Enum->getName();
       addString(Enumerator, dwarf::DW_AT_name, Name);
-      int64_t Value = Enum.getEnumValue();
+      int64_t Value = Enum->getValue();
       addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
               Value);
     }
   }
-  DIType DTy = resolve(CTy.getTypeDerivedFrom());
+  DIType DTy = resolve(CTy->getBaseType());
   if (DTy) {
     addType(Buffer, DTy);
     addFlag(Buffer, dwarf::DW_AT_enum_class);
@@ -1481,17 +1453,20 @@ void DwarfUnit::constructContainingTypeDIEs() {
 }
 
 /// constructMemberDIE - Construct member DIE from DIDerivedType.
-void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
-  DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer);
-  StringRef Name = DT.getName();
+void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT_) {
+  // Downcast to MDDerivedType.
+  const MDDerivedType *DT = cast<MDDerivedType>(DT_);
+
+  DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer);
+  StringRef Name = DT->getName();
   if (!Name.empty())
     addString(MemberDie, dwarf::DW_AT_name, Name);
 
-  addType(MemberDie, resolve(DT.getTypeDerivedFrom()));
+  addType(MemberDie, resolve(DT->getBaseType()));
 
   addSourceLine(MemberDie, DT);
 
-  if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) {
+  if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) {
 
     // For C++, virtual base classes are not at fixed offset. Use following
     // expression to extract appropriate offset from vtable.
@@ -1501,14 +1476,14 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-    addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits());
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
     addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
 
     addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
   } else {
-    uint64_t Size = DT.getSizeInBits();
+    uint64_t Size = DT->getSizeInBits();
     uint64_t FieldSize = getBaseTypeSize(DD, DT);
     uint64_t OffsetInBytes;
 
@@ -1517,8 +1492,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
       addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8);
       addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
 
-      uint64_t Offset = DT.getOffsetInBits();
-      uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+      uint64_t Offset = DT->getOffsetInBits();
+      uint64_t AlignMask = ~(DT->getAlignInBits() - 1);
       uint64_t HiMark = (Offset + FieldSize) & AlignMask;
       uint64_t FieldOffset = (HiMark - FieldSize);
       Offset -= FieldOffset;
@@ -1533,7 +1508,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
       OffsetInBytes = FieldOffset >> 3;
     } else
       // This is not a bitfield.
-      OffsetInBytes = DT.getOffsetInBits() >> 3;
+      OffsetInBytes = DT->getOffsetInBits() >> 3;
 
     if (DD->getDwarfVersion() <= 2) {
       DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc();
@@ -1545,49 +1520,50 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
               OffsetInBytes);
   }
 
-  if (DT.isProtected())
+  if (DT->isProtected())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
+  else if (DT->isPrivate())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
-  else if (DT.isPublic())
+  else if (DT->isPublic())
     addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
-  if (DT.isVirtual())
+  if (DT->isVirtual())
     addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
             dwarf::DW_VIRTUALITY_virtual);
 
   // Objective-C properties.
-  if (MDNode *PNode = DT.getObjCProperty())
+  if (MDNode *PNode = DT->getObjCProperty())
     if (DIEEntry *PropertyDie = getDIEEntry(PNode))
       MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
                          PropertyDie);
 
-  if (DT.isArtificial())
+  if (DT->isArtificial())
     addFlag(MemberDie, dwarf::DW_AT_artificial);
 }
 
 /// getOrCreateStaticMemberDIE - Create new DIE for C++ static member.
-DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
-  if (!DT.Verify())
+DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT_) {
+  const MDDerivedType *DT = cast_or_null<MDDerivedType>(DT_);
+  if (!DT)
     return nullptr;
 
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
-  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext()));
+  DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope()));
   assert(dwarf::isType(ContextDIE->getTag()) &&
          "Static member should belong to a type.");
 
   if (DIE *StaticMemberDIE = getDIE(DT))
     return StaticMemberDIE;
 
-  DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT);
+  DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
 
-  DIType Ty = resolve(DT.getTypeDerivedFrom());
+  DIType Ty = resolve(DT->getBaseType());
 
-  addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName());
+  addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
   addType(StaticMemberDIE, Ty);
   addSourceLine(StaticMemberDIE, DT);
   addFlag(StaticMemberDIE, dwarf::DW_AT_external);
@@ -1595,19 +1571,19 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
 
   // FIXME: We could omit private if the parent is a class_type, and
   // public if the parent is something else.
-  if (DT.isProtected())
+  if (DT->isProtected())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
-  else if (DT.isPrivate())
+  else if (DT->isPrivate())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
-  else if (DT.isPublic())
+  else if (DT->isPublic())
     addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
 
-  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
+  if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant()))
     addConstantValue(StaticMemberDIE, CI, Ty);
-  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
+  if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
     addConstantFPValue(StaticMemberDIE, CFP);
 
   return &StaticMemberDIE;
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 81c5821..b354255 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -141,7 +141,7 @@ public:
   // Accessors.
   AsmPrinter* getAsmPrinter() const { return Asm; }
   unsigned getUniqueID() const { return UniqueID; }
-  uint16_t getLanguage() const { return CUNode.getLanguage(); }
+  uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
   DICompileUnit getCUNode() const { return CUNode; }
   DIE &getUnitDie() { return UnitDie; }
 
@@ -342,7 +342,7 @@ protected:
 
   /// resolve - Look in the DwarfDebug map for the MDNode that
   /// corresponds to the reference.
-  template <typename T> T resolve(DIRef<T> Ref) const {
+  template <typename T> T *resolve(TypedDebugNodeRef<T> Ref) const {
     return DD->resolve(Ref);
   }
 
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 14df4c9..6f64d8f 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -188,20 +188,12 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
   return MarkedNoUnwind;
 }
 
-/// Compute the call-site table.  The entry for an invoke has a try-range
-/// containing the call, a non-zero landing pad, and an appropriate action.  The
-/// entry for an ordinary call has a try-range containing the call and zero for
-/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
-/// must not be contained in the try-range of any entry - they form gaps in the
-/// table.  Entries must be ordered by try-range address.
-void EHStreamer::
-computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
-                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
-                     const SmallVectorImpl<unsigned> &FirstActions) {
+void EHStreamer::computePadMap(
+    const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+    RangeMapType &PadMap) {
   // Invokes and nounwind calls have entries in PadMap (due to being bracketed
   // by try-range labels when lowered).  Ordinary calls do not, so appropriate
   // try-ranges for them need be deduced so we can put them in the LSDA.
-  RangeMapType PadMap;
   for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
     const LandingPadInfo *LandingPad = LandingPads[i];
     for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
@@ -211,6 +203,20 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       PadMap[BeginLabel] = P;
     }
   }
+}
+
+/// Compute the call-site table.  The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action.  The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action.  Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table.  Entries must be ordered by try-range address.
+void EHStreamer::
+computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     const SmallVectorImpl<unsigned> &FirstActions) {
+  RangeMapType PadMap;
+  computePadMap(LandingPads, PadMap);
 
   // The end label of the previous invoke or nounwind try-range.
   MCSymbol *LastLabel = nullptr;
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 94d0585..65973fa 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -80,13 +80,15 @@ protected:
   /// `false' otherwise.
   bool callToNoUnwindFunction(const MachineInstr *MI);
 
+  void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     RangeMapType &PadMap);
+
   /// Compute the call-site table.  The entry for an invoke has a try-range
   /// containing the call, a non-zero landing pad and an appropriate action.
   /// The entry for an ordinary call has a try-range containing the call and
   /// zero for the landing pad and the action.  Calls marked 'nounwind' have
   /// no entry and must not be contained in the try-range of any entry - they
   /// form gaps in the table.  Entries must be ordered by try-range address.
-
   void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
                             const SmallVectorImpl<const LandingPadInfo *> &LPs,
                             const SmallVectorImpl<unsigned> &FirstActions);
@@ -123,7 +125,7 @@ protected:
 
 public:
   EHStreamer(AsmPrinter *A);
-  virtual ~EHStreamer();
+  ~EHStreamer() override;
 
   // Unused.
   void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 7d76ead..f89d364 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
@@ -67,6 +68,27 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
+
+  // If this was an outlined handler, we need to define the label corresponding
+  // to the offset of the parent frame relative to the stack pointer after the
+  // prologue.
+  const Function *F = MF->getFunction();
+  const Function *ParentF = MMI->getWinEHParent(F);
+  if (F != ParentF) {
+    WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+    auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F);
+    if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) {
+      MCSymbol *HandlerTypeParentFrameOffset =
+          Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+              GlobalValue::getRealLinkageName(F->getName()));
+
+      // Emit a symbol assignment.
+      Asm->OutStreamer.EmitAssignment(
+          HandlerTypeParentFrameOffset,
+          MCConstantExpr::Create(I->second, Asm->OutContext));
+    }
+  }
+
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
@@ -82,12 +104,17 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
 
 /// endFunction - Gather and emit post-function exception information.
 ///
-void Win64Exception::endFunction(const MachineFunction *) {
+void Win64Exception::endFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality && !shouldEmitMoves)
     return;
 
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  EHPersonality Per = MMI->getPersonalityType();
+
+  // Get rid of any dead landing pads if we're not using a Windows EH scheme. In
+  // Windows EH schemes, the landing pad is not actually reachable. It only
+  // exists so that we can emit the right table data.
+  if (!isMSVCEHPersonality(Per))
+    MMI->TidyLandingPads();
 
   if (shouldEmitPersonality) {
     Asm->OutStreamer.PushSection();
@@ -97,9 +124,10 @@ void Win64Exception::endFunction(const MachineFunction *) {
 
     // Emit the tables appropriate to the personality function in use. If we
     // don't recognize the personality, assume it uses an Itanium-style LSDA.
-    EHPersonality Per = MMI->getPersonalityType();
     if (Per == EHPersonality::MSVC_Win64SEH)
       emitCSpecificHandlerTable();
+    else if (Per == EHPersonality::MSVC_CXX)
+      emitCXXFrameHandler3Table(MF);
     else
       emitExceptionTable();
 
@@ -108,11 +136,19 @@ void Win64Exception::endFunction(const MachineFunction *) {
   Asm->OutStreamer.EmitWinCFIEndProc();
 }
 
-const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
+const MCExpr *Win64Exception::createImageRel32(const MCSymbol *Value) {
+  if (!Value)
+    return MCConstantExpr::Create(0, Asm->OutContext);
   return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32,
                                  Asm->OutContext);
 }
 
+const MCExpr *Win64Exception::createImageRel32(const GlobalValue *GV) {
+  if (!GV)
+    return MCConstantExpr::Create(0, Asm->OutContext);
+  return createImageRel32(Asm->getSymbol(GV));
+}
+
 /// Emit the language-specific data that __C_specific_handler expects.  This
 /// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
 /// up after faults with __try, __except, and __finally.  The typeinfo values
@@ -237,3 +273,231 @@ void Win64Exception::emitCSpecificHandlerTable() {
     }
   }
 }
+
+void Win64Exception::emitCXXFrameHandler3Table(const MachineFunction *MF) {
+  const Function *F = MF->getFunction();
+  const Function *ParentF = MMI->getWinEHParent(F);
+  auto &OS = Asm->OutStreamer;
+  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+
+  StringRef ParentLinkageName =
+      GlobalValue::getRealLinkageName(ParentF->getName());
+
+  MCSymbol *FuncInfoXData =
+      Asm->OutContext.GetOrCreateSymbol(Twine("$cppxdata$", ParentLinkageName));
+  OS.EmitValue(createImageRel32(FuncInfoXData), 4);
+
+  // The Itanium LSDA table sorts similar landing pads together to simplify the
+  // actions table, but we don't need that.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  LandingPads.reserve(PadInfos.size());
+  for (const auto &LP : PadInfos)
+    LandingPads.push_back(&LP);
+
+  RangeMapType PadMap;
+  computePadMap(LandingPads, PadMap);
+
+  // The end label of the previous invoke or nounwind try-range.
+  MCSymbol *LastLabel = Asm->getFunctionBegin();
+
+  // Whether there is a potentially throwing instruction (currently this means
+  // an ordinary call) between the end of the previous try-range and now.
+  bool SawPotentiallyThrowing = false;
+
+  int LastEHState = -2;
+
+  // The parent function and the catch handlers contribute to the 'ip2state'
+  // table.
+  for (const auto &MBB : *MF) {
+    for (const auto &MI : MBB) {
+      if (!MI.isEHLabel()) {
+        if (MI.isCall())
+          SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
+        continue;
+      }
+
+      // End of the previous try-range?
+      MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
+      if (BeginLabel == LastLabel)
+        SawPotentiallyThrowing = false;
+
+      // Beginning of a new try-range?
+      RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+      if (L == PadMap.end())
+        // Nope, it was just some random label.
+        continue;
+
+      const PadRange &P = L->second;
+      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+             "Inconsistent landing pad map!");
+
+      if (SawPotentiallyThrowing) {
+        FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
+        SawPotentiallyThrowing = false;
+        LastEHState = -1;
+      }
+
+      if (LandingPad->WinEHState != LastEHState)
+        FuncInfo.IPToStateList.push_back(
+            std::make_pair(BeginLabel, LandingPad->WinEHState));
+      LastEHState = LandingPad->WinEHState;
+      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+    }
+  }
+
+  // Defer emission until we've visited the parent function and all the catch
+  // handlers.  Cleanups don't contribute to the ip2state table yet, so don't
+  // count them.
+  if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
+    return;
+  ++FuncInfo.NumIPToStateFuncsVisited;
+  if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
+    return;
+
+  MCSymbol *UnwindMapXData = nullptr;
+  MCSymbol *TryBlockMapXData = nullptr;
+  MCSymbol *IPToStateXData = nullptr;
+  if (!FuncInfo.UnwindMap.empty())
+    UnwindMapXData = Asm->OutContext.GetOrCreateSymbol(
+        Twine("$stateUnwindMap$", ParentLinkageName));
+  if (!FuncInfo.TryBlockMap.empty())
+    TryBlockMapXData = Asm->OutContext.GetOrCreateSymbol(
+        Twine("$tryMap$", ParentLinkageName));
+  if (!FuncInfo.IPToStateList.empty())
+    IPToStateXData = Asm->OutContext.GetOrCreateSymbol(
+        Twine("$ip2state$", ParentLinkageName));
+
+  // FuncInfo {
+  //   uint32_t           MagicNumber
+  //   int32_t            MaxState;
+  //   UnwindMapEntry    *UnwindMap;
+  //   uint32_t           NumTryBlocks;
+  //   TryBlockMapEntry  *TryBlockMap;
+  //   uint32_t           IPMapEntries;
+  //   IPToStateMapEntry *IPToStateMap;
+  //   uint32_t           UnwindHelp; // (x64/ARM only)
+  //   ESTypeList        *ESTypeList;
+  //   int32_t            EHFlags;
+  // }
+  // EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
+  // EHFlags & 2 -> ???
+  // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
+  OS.EmitLabel(FuncInfoXData);
+  OS.EmitIntValue(0x19930522, 4);                      // MagicNumber
+  OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4);       // MaxState
+  OS.EmitValue(createImageRel32(UnwindMapXData), 4);   // UnwindMap
+  OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);     // NumTryBlocks
+  OS.EmitValue(createImageRel32(TryBlockMapXData), 4); // TryBlockMap
+  OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4);   // IPMapEntries
+  OS.EmitValue(createImageRel32(IPToStateXData), 4);   // IPToStateMap
+  OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4);  // UnwindHelp
+  OS.EmitIntValue(0, 4);                               // ESTypeList
+  OS.EmitIntValue(1, 4);                               // EHFlags
+
+  // UnwindMapEntry {
+  //   int32_t ToState;
+  //   void  (*Action)();
+  // };
+  if (UnwindMapXData) {
+    OS.EmitLabel(UnwindMapXData);
+    for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) {
+      OS.EmitIntValue(UME.ToState, 4);                // ToState
+      OS.EmitValue(createImageRel32(UME.Cleanup), 4); // Action
+    }
+  }
+
+  // TryBlockMap {
+  //   int32_t      TryLow;
+  //   int32_t      TryHigh;
+  //   int32_t      CatchHigh;
+  //   int32_t      NumCatches;
+  //   HandlerType *HandlerArray;
+  // };
+  if (TryBlockMapXData) {
+    OS.EmitLabel(TryBlockMapXData);
+    SmallVector<MCSymbol *, 1> HandlerMaps;
+    for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      MCSymbol *HandlerMapXData = nullptr;
+
+      if (!TBME.HandlerArray.empty())
+        HandlerMapXData =
+            Asm->OutContext.GetOrCreateSymbol(Twine("$handlerMap$")
+                                                  .concat(Twine(I))
+                                                  .concat("$")
+                                                  .concat(ParentLinkageName));
+
+      HandlerMaps.push_back(HandlerMapXData);
+
+      int CatchHigh = -1;
+      for (WinEHHandlerType &HT : TBME.HandlerArray)
+        CatchHigh =
+            std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]);
+
+      assert(TBME.TryLow <= TBME.TryHigh);
+      assert(CatchHigh > TBME.TryHigh);
+      OS.EmitIntValue(TBME.TryLow, 4);                    // TryLow
+      OS.EmitIntValue(TBME.TryHigh, 4);                   // TryHigh
+      OS.EmitIntValue(CatchHigh, 4);                      // CatchHigh
+      OS.EmitIntValue(TBME.HandlerArray.size(), 4);       // NumCatches
+      OS.EmitValue(createImageRel32(HandlerMapXData), 4); // HandlerArray
+    }
+
+    for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
+      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      MCSymbol *HandlerMapXData = HandlerMaps[I];
+      if (!HandlerMapXData)
+        continue;
+      // HandlerType {
+      //   int32_t         Adjectives;
+      //   TypeDescriptor *Type;
+      //   int32_t         CatchObjOffset;
+      //   void          (*Handler)();
+      //   int32_t         ParentFrameOffset; // x64 only
+      // };
+      OS.EmitLabel(HandlerMapXData);
+      for (const WinEHHandlerType &HT : TBME.HandlerArray) {
+        MCSymbol *ParentFrameOffset =
+            Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
+                GlobalValue::getRealLinkageName(HT.Handler->getName()));
+        const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::Create(
+            ParentFrameOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+
+        // Get the frame escape label with the offset of the catch object. If
+        // the index is -1, then there is no catch object, and we should emit an
+        // offset of zero, indicating that no copy will occur.
+        const MCExpr *FrameAllocOffsetRef = nullptr;
+        if (HT.CatchObjRecoverIdx >= 0) {
+          MCSymbol *FrameAllocOffset =
+              Asm->OutContext.getOrCreateFrameAllocSymbol(
+                  GlobalValue::getRealLinkageName(ParentF->getName()),
+                  HT.CatchObjRecoverIdx);
+          FrameAllocOffsetRef = MCSymbolRefExpr::Create(
+              FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+        } else {
+          FrameAllocOffsetRef = MCConstantExpr::Create(0, Asm->OutContext);
+        }
+
+        OS.EmitIntValue(HT.Adjectives, 4);                    // Adjectives
+        OS.EmitValue(createImageRel32(HT.TypeDescriptor), 4); // Type
+        OS.EmitValue(FrameAllocOffsetRef, 4);                 // CatchObjOffset
+        OS.EmitValue(createImageRel32(HT.Handler), 4);        // Handler
+        OS.EmitValue(ParentFrameOffsetRef, 4);                // ParentFrameOffset
+      }
+    }
+  }
+
+  // IPToStateMapEntry {
+  //   void   *IP;
+  //   int32_t State;
+  // };
+  if (IPToStateXData) {
+    OS.EmitLabel(IPToStateXData);
+    for (auto &IPStatePair : FuncInfo.IPToStateList) {
+      OS.EmitValue(createImageRel32(IPStatePair.first), 4); // IP
+      OS.EmitIntValue(IPStatePair.second, 4);               // State
+    }
+  }
+}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h
index b2d5d1b..5f4237f 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.h
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.h
@@ -17,7 +17,9 @@
 #include "EHStreamer.h"
 
 namespace llvm {
+class GlobalValue;
 class MachineFunction;
+class MCExpr;
 
 class Win64Exception : public EHStreamer {
   /// Per-function flag to indicate if personality info should be emitted.
@@ -31,14 +33,17 @@ class Win64Exception : public EHStreamer {
 
   void emitCSpecificHandlerTable();
 
-  const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value);
+  void emitCXXFrameHandler3Table(const MachineFunction *MF);
+
+  const MCExpr *createImageRel32(const MCSymbol *Value);
+  const MCExpr *createImageRel32(const GlobalValue *GV);
 
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
   //
   Win64Exception(AsmPrinter *A);
-  virtual ~Win64Exception();
+  ~Win64Exception() override;
 
   /// Emit all exception information that should come after the content.
   void endModule() override;
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index d2b4eec..276e7df 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -20,14 +20,13 @@ namespace llvm {
 
 StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   assert(S);
-  DIDescriptor D(S);
-  assert((D.isCompileUnit() || D.isFile() || D.isSubprogram() ||
-          D.isLexicalBlockFile() || D.isLexicalBlock()) &&
+  assert((isa<MDCompileUnit>(S) || isa<MDFile>(S) || isa<MDSubprogram>(S) ||
+          isa<MDLexicalBlockBase>(S)) &&
          "Unexpected scope info");
 
-  DIScope Scope(S);
-  StringRef Dir = Scope.getDirectory(),
-            Filename = Scope.getFilename();
+  auto *Scope = cast<MDScope>(S);
+  StringRef Dir = Scope->getDirectory(),
+            Filename = Scope->getFilename();
   char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
   if (Result)
     return Result;
@@ -40,7 +39,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   if (Filename.find(':') == 1)
     Filepath = Filename;
   else
-    Filepath = (Dir + Twine("\\") + Filename).str();
+    Filepath = (Dir + "\\" + Filename).str();
 
   // Canonicalize the path.  We have to do it textually because we may no longer
   // have access the file in the filesystem.
@@ -81,7 +80,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
 
 void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
                                                 const MachineFunction *MF) {
-  const MDNode *Scope = DL.getScope(MF->getFunction()->getContext());
+  const MDNode *Scope = DL.getScope();
   if (!Scope)
     return;
   StringRef Filename = getFullFilepath(Scope);
@@ -193,7 +192,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
   StringRef GVName = GV->getName();
   StringRef FuncName;
   if (DISubprogram SP = getDISubprogram(GV))
-    FuncName = SP.getDisplayName();
+    FuncName = SP->getDisplayName();
 
   // FIXME Clang currently sets DisplayName to "bar" for a C++
   // "namespace_foo::bar" function, see PR21528.  Luckily, dbghelp.dll is trying
@@ -330,7 +329,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
   DebugLoc PrologEndLoc;
   bool EmptyPrologue = true;
   for (const auto &MBB : *MF) {
-    if (!PrologEndLoc.isUnknown())
+    if (PrologEndLoc)
       break;
     for (const auto &MI : MBB) {
       if (MI.isDebugValue())
@@ -339,8 +338,7 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
       // First known non-DBG_VALUE and non-frame setup location marks
       // the beginning of the function body.
       // FIXME: do we need the first subcondition?
-      if (!MI.getFlag(MachineInstr::FrameSetup) &&
-          (!MI.getDebugLoc().isUnknown())) {
+      if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
         PrologEndLoc = MI.getDebugLoc();
         break;
       }
@@ -348,9 +346,8 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
     }
   }
   // Record beginning of function if we have a non-empty prologue.
-  if (!PrologEndLoc.isUnknown() && !EmptyPrologue) {
-    DebugLoc FnStartDL =
-        PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
+  if (PrologEndLoc && !EmptyPrologue) {
+    DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
     maybeRecordLocation(FnStartDL, MF);
   }
 }
@@ -377,7 +374,7 @@ void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
     return;
   DebugLoc DL = MI->getDebugLoc();
-  if (DL == PrevInstLoc || DL.isUnknown())
+  if (DL == PrevInstLoc || !DL)
     return;
   maybeRecordLocation(DL, Asm->MF);
 }
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 8492eac..c66d141 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -114,7 +114,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler {
 public:
   WinCodeViewLineTables(AsmPrinter *Asm);
 
-  ~WinCodeViewLineTables() {
+  ~WinCodeViewLineTables() override {
     for (DirAndFilenameToFilepathMapTy::iterator
              I = DirAndFilenameToFilepathMap.begin(),
              E = DirAndFilenameToFilepathMap.end();
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 6c9d048..35376e1 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -693,11 +693,11 @@ static bool SinkCast(CastInst *CI) {
       InsertedCast =
         CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
                          InsertPt);
-      MadeChange = true;
     }
 
     // Replace a use of the cast with a use of the new cast.
     TheUse = InsertedCast;
+    MadeChange = true;
     ++NumCastUses;
   }
 
@@ -747,13 +747,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
   return SinkCast(CI);
 }
 
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+  Value *A, *B;
+  Instruction *AddI;
+  if (!match(CI,
+             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+    return false;
+
+  Type *Ty = AddI->getType();
+  if (!isa<IntegerType>(Ty))
+    return false;
+
+  // We don't want to move around uses of condition values this late, so we we
+  // check if it is legal to create the call to the intrinsic in the basic
+  // block containing the icmp:
+
+  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+    return false;
+
+#ifndef NDEBUG
+  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+  // for now:
+  if (AddI->hasOneUse())
+    assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+  Module *M = CI->getParent()->getParent()->getParent();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+  auto *UAddWithOverflow =
+      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  auto *Overflow =
+      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+  CI->replaceAllUsesWith(Overflow);
+  AddI->replaceAllUsesWith(UAdd);
+  CI->eraseFromParent();
+  AddI->eraseFromParent();
+  return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
 /// the number of virtual registers that must be created and coalesced.  This is
 /// a clear win except on targets with multiple condition code registers
 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
   BasicBlock *DefBB = CI->getParent();
 
   /// InsertedCmp - Only insert a cmp in each block once.
@@ -787,21 +834,33 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
         CmpInst::Create(CI->getOpcode(),
                         CI->getPredicate(),  CI->getOperand(0),
                         CI->getOperand(1), "", InsertPt);
-      MadeChange = true;
     }
 
     // Replace a use of the cmp with a use of the new cmp.
     TheUse = InsertedCmp;
+    MadeChange = true;
     ++NumCmpUses;
   }
 
   // If we removed all uses, nuke the cmp.
-  if (CI->use_empty())
+  if (CI->use_empty()) {
     CI->eraseFromParent();
+    MadeChange = true;
+  }
 
   return MadeChange;
 }
 
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  if (SinkCmpExpression(CI))
+    return true;
+
+  if (CombineUAddWithOverflow(CI))
+    return true;
+
+  return false;
+}
+
 /// isExtractBitsCandidateUse - Check if the candidates could
 /// be combined with shift instruction, which includes:
 /// 1. Truncate instruction
@@ -1081,8 +1140,9 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
     //
     CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
     Builder.SetInsertPoint(InsertPt);
-    
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     LoadInst* Load = Builder.CreateLoad(Gep, false);
     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
 
@@ -1176,7 +1236,8 @@ static void ScalarizeMaskedStore(CallInst *CI) {
     Builder.SetInsertPoint(InsertPt);
     
     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
-    Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx));
+    Value *Gep =
+        Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
     Builder.CreateStore(OneElt, Gep);
 
     // Create "else" block, fill it in the next iteration
@@ -1227,13 +1288,25 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
                      cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
       Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
       uint64_t Offset2 = Offset.getLimitedValue();
+      if ((Offset2 & (PrefAlign-1)) != 0)
+        continue;
       AllocaInst *AI;
-      if ((Offset2 & (PrefAlign-1)) == 0 &&
-          (AI = dyn_cast<AllocaInst>(Val)) &&
+      if ((AI = dyn_cast<AllocaInst>(Val)) &&
           AI->getAlignment() < PrefAlign &&
           TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
         AI->setAlignment(PrefAlign);
-      // TODO: Also align GlobalVariables
+      // Global variables can only be aligned if they are defined in this
+      // object (i.e. they are uniquely initialized in this object), and
+      // over-aligning global variables that have an explicit section is
+      // forbidden.
+      GlobalVariable *GV;
+      if ((GV = dyn_cast<GlobalVariable>(Val)) &&
+          GV->hasUniqueInitializer() &&
+          !GV->hasSection() &&
+          GV->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(
+            GV->getType()->getElementType()) >= MinSize + Offset2)
+        GV->setAlignment(PrefAlign);
     }
     // If this is a memcpy (or similar) then we may be able to improve the
     // alignment
@@ -1841,7 +1914,7 @@ class TypePromotionTransaction {
       Inst->removeFromParent();
     }
 
-    ~InstructionRemover() { delete Replacer; }
+    ~InstructionRemover() override { delete Replacer; }
 
     /// \brief Really remove the instruction.
     void commit() override { delete Inst; }
@@ -3233,7 +3306,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       return false;
     } else {
       Type *I8PtrTy =
-        Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+          Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+      Type *I8Ty = Builder.getInt8Ty();
 
       // Start with the base register. Do this first so that subsequent address
       // matching finds it last, which will prevent it from trying to match it
@@ -3285,7 +3359,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
           // SDAG consecutive load/store merging.
           if (ResultPtr->getType() != I8PtrTy)
             ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-          ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+          ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
         }
 
         ResultIndex = V;
@@ -3296,7 +3370,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       } else {
         if (ResultPtr->getType() != I8PtrTy)
           ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-        SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+        SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
       }
 
       if (SunkAddr->getType() != Addr->getType())
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index ceef74d..af011a0 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -69,7 +69,7 @@ class TargetRegisterInfo;
 
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
-    ~CriticalAntiDepBreaker();
+    ~CriticalAntiDepBreaker() override;
 
     /// Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB) override;
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 8f74271..6cde4c2 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -797,9 +797,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
   // if-conversion in a single pass. The tryConvertIf() function may erase
   // blocks, but only blocks dominated by the head block. This makes it safe to
   // update the dominator tree while the post-order iterator is still active.
-  for (po_iterator<MachineDominatorTree*>
-       I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I)
-    if (tryConvertIf(I->getBlock()))
+  for (auto DomNode : post_order(DomTree))
+    if (tryConvertIf(DomNode->getBlock()))
       Changed = true;
 
   return Changed;
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index a2c5fce..16cd9e8 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -99,10 +99,6 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static const char *DescKind(GC::PointKind Kind) {
   switch (Kind) {
-  case GC::Loop:
-    return "loop";
-  case GC::Return:
-    return "return";
   case GC::PreCall:
     return "pre-call";
   case GC::PostCall:
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index 9d38e4c..ac35165 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -332,19 +332,22 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
-  if (!FI->getStrategy().needsSafePoints())
-    return false;
-
   MMI = &getAnalysis<MachineModuleInfo>();
   TII = MF.getSubtarget().getInstrInfo();
 
-  // Find the size of the stack frame.
-  FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+  // Find the size of the stack frame.  There may be no correct static frame
+  // size, we use UINT64_MAX to represent this.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+    RegInfo->needsStackRealignment(MF);
+  FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize());
 
   // Find all safe points.
-  FindSafePoints(MF);
+  if (FI->getStrategy().needsSafePoints())
+    FindSafePoints(MF);
 
-  // Find the stack offsets for all roots.
+  // Find the concrete stack offsets for all roots (stack slots)
   FindStackOffsets(MF);
 
   return false;
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 4188e5d..153ba1a 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -73,9 +73,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "global-merge"
 
+// FIXME: This is only useful as a last-resort way to disable the pass.
 cl::opt<bool>
 EnableGlobalMerge("enable-global-merge", cl::Hidden,
-                  cl::desc("Enable global merge pass"),
+                  cl::desc("Enable the global merge pass"),
                   cl::init(true));
 
 static cl::opt<bool>
@@ -222,7 +223,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
         ConstantInt::get(Int32Ty, 0),
         ConstantInt::get(Int32Ty, k-i)
       };
-      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+      Constant *GEP =
+          ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
       Globals[k]->replaceAllUsesWith(GEP);
       Globals[k]->eraseFromParent();
 
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index f0d407f..c7e7e58 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -135,7 +135,7 @@ private:
   // Dead defs generated during spilling.
   SmallVector<MachineInstr*, 8> DeadDefs;
 
-  ~InlineSpiller() {}
+  ~InlineSpiller() override {}
 
 public:
   InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
@@ -1232,6 +1232,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
       DebugLoc DL = MI->getDebugLoc();
       DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
       MachineBasicBlock *MBB = MI->getParent();
+      assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE))
           .addFrameIndex(StackSlot)
           .addImm(Offset)
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 0fb0c46..61d68f6 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -140,12 +140,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
   return &MMI->getContext();
 }
 
-bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                            formatted_raw_ostream &Out,
-                                            CodeGenFileType FileType,
-                                            bool DisableVerify,
-                                            AnalysisID StartAfter,
-                                            AnalysisID StopAfter) {
+bool LLVMTargetMachine::addPassesToEmitFile(
+    PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
   // Add common CodeGen passes.
   MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
                                                StartAfter, StopAfter);
@@ -175,7 +172,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   switch (FileType) {
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
-        MAI.getAssemblerDialect(), MAI, MII, MRI, STI);
+        Triple(getTargetTriple()), MAI.getAssemblerDialect(), MAI, MII, MRI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = nullptr;
@@ -184,8 +181,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 
     MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
                                                        TargetCPU);
+    auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
-        *Context, Out, Options.MCOptions.AsmVerbose,
+        *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
         Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
         Options.MCOptions.ShowMCInst);
     AsmStreamer.reset(S);
@@ -229,9 +227,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 /// code is not supported. It fills the MCContext Ctx pointer which can be
 /// used to build custom MCStreamer.
 ///
-bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
-                                          MCContext *&Ctx,
-                                          raw_ostream &Out,
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
+                                          raw_pwrite_stream &Out,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
   Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr);
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 9eaf7da..d6998d6 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -59,11 +59,11 @@ void LexicalScopes::extractLexicalScopes(
   for (const auto &MBB : *MF) {
     const MachineInstr *RangeBeginMI = nullptr;
     const MachineInstr *PrevMI = nullptr;
-    DebugLoc PrevDL;
+    const MDLocation *PrevDL = nullptr;
     for (const auto &MInsn : MBB) {
       // Check if instruction has valid location information.
-      const DebugLoc MIDL = MInsn.getDebugLoc();
-      if (MIDL.isUnknown()) {
+      const MDLocation *MIDL = MInsn.getDebugLoc();
+      if (!MIDL) {
         PrevMI = &MInsn;
         continue;
       }
@@ -96,7 +96,7 @@ void LexicalScopes::extractLexicalScopes(
     }
 
     // Create last instruction range.
-    if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
+    if (RangeBeginMI && PrevMI && PrevDL) {
       InsnRange R(RangeBeginMI, PrevMI);
       MIRanges.push_back(R);
       MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL);
@@ -106,20 +106,17 @@ void LexicalScopes::extractLexicalScopes(
 
 /// findLexicalScope - Find lexical scope, either regular or inlined, for the
 /// given DebugLoc. Return NULL if not found.
-LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
-  MDNode *Scope = nullptr;
-  MDNode *IA = nullptr;
-  DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+LexicalScope *LexicalScopes::findLexicalScope(const MDLocation *DL) {
+  MDLocalScope *Scope = DL->getScope();
   if (!Scope)
     return nullptr;
 
   // The scope that we were created with could have an extra file - which
   // isn't what we care about in this case.
-  DIDescriptor D = DIDescriptor(Scope);
-  if (D.isLexicalBlockFile())
-    Scope = DILexicalBlockFile(Scope).getScope();
+  if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope))
+    Scope = File->getScope();
 
-  if (IA) {
+  if (auto *IA = DL->getInlinedAt()) {
     auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
     return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
   }
@@ -128,46 +125,39 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
 
 /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
 /// not available then create new lexical scope.
-LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
-  if (DL.isUnknown())
-    return nullptr;
-  MDNode *Scope = nullptr;
-  MDNode *InlinedAt = nullptr;
-  DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
-
-  if (InlinedAt) {
+LexicalScope *LexicalScopes::getOrCreateLexicalScope(const MDLocalScope *Scope,
+                                                     const MDLocation *IA) {
+  if (IA) {
     // Create an abstract scope for inlined function.
     getOrCreateAbstractScope(Scope);
     // Create an inlined scope for inlined function.
-    return getOrCreateInlinedScope(Scope, InlinedAt);
+    return getOrCreateInlinedScope(Scope, IA);
   }
 
   return getOrCreateRegularScope(Scope);
 }
 
 /// getOrCreateRegularScope - Find or create a regular lexical scope.
-LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
-  DIDescriptor D = DIDescriptor(Scope);
-  if (D.isLexicalBlockFile()) {
-    Scope = DILexicalBlockFile(Scope).getScope();
-    D = DIDescriptor(Scope);
-  }
+LexicalScope *
+LexicalScopes::getOrCreateRegularScope(const MDLocalScope *Scope) {
+  if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope))
+    Scope = File->getScope();
 
   auto I = LexicalScopeMap.find(Scope);
   if (I != LexicalScopeMap.end())
     return &I->second;
 
+  // FIXME: Should the following dyn_cast be MDLexicalBlock?
   LexicalScope *Parent = nullptr;
-  if (D.isLexicalBlock())
-    Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
+  if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope))
+    Parent = getOrCreateLexicalScope(Block->getScope());
   I = LexicalScopeMap.emplace(std::piecewise_construct,
                               std::forward_as_tuple(Scope),
-                              std::forward_as_tuple(Parent, DIDescriptor(Scope),
-                                                    nullptr, false)).first;
+                              std::forward_as_tuple(Parent, Scope, nullptr,
+                                                    false)).first;
 
   if (!Parent) {
-    assert(DIDescriptor(Scope).isSubprogram());
-    assert(DISubprogram(Scope).describes(MF->getFunction()));
+    assert(cast<MDSubprogram>(Scope)->describes(MF->getFunction()));
     assert(!CurrentFnLexicalScope);
     CurrentFnLexicalScope = &I->second;
   }
@@ -176,19 +166,19 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
 }
 
 /// getOrCreateInlinedScope - Find or create an inlined lexical scope.
-LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode,
-                                                     MDNode *InlinedAt) {
-  std::pair<const MDNode*, const MDNode*> P(ScopeNode, InlinedAt);
+LexicalScope *
+LexicalScopes::getOrCreateInlinedScope(const MDLocalScope *Scope,
+                                       const MDLocation *InlinedAt) {
+  std::pair<const MDLocalScope *, const MDLocation *> P(Scope, InlinedAt);
   auto I = InlinedLexicalScopeMap.find(P);
   if (I != InlinedLexicalScopeMap.end())
     return &I->second;
 
   LexicalScope *Parent;
-  DILexicalBlock Scope(ScopeNode);
-  if (Scope.isSubprogram())
-    Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt));
+  if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope))
+    Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt);
   else
-    Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt);
+    Parent = getOrCreateLexicalScope(InlinedAt);
 
   I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
                                      std::forward_as_tuple(P),
@@ -199,27 +189,26 @@ LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode,
 }
 
 /// getOrCreateAbstractScope - Find or create an abstract lexical scope.
-LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
-  assert(N && "Invalid Scope encoding!");
+LexicalScope *
+LexicalScopes::getOrCreateAbstractScope(const MDLocalScope *Scope) {
+  assert(Scope && "Invalid Scope encoding!");
 
-  DIDescriptor Scope(N);
-  if (Scope.isLexicalBlockFile())
-    Scope = DILexicalBlockFile(Scope).getScope();
+  if (auto *File = dyn_cast<MDLexicalBlockFile>(Scope))
+    Scope = File->getScope();
   auto I = AbstractScopeMap.find(Scope);
   if (I != AbstractScopeMap.end())
     return &I->second;
 
+  // FIXME: Should the following isa be MDLexicalBlock?
   LexicalScope *Parent = nullptr;
-  if (Scope.isLexicalBlock()) {
-    DILexicalBlock DB(Scope);
-    DIDescriptor ParentDesc = DB.getContext();
-    Parent = getOrCreateAbstractScope(ParentDesc);
-  }
+  if (auto *Block = dyn_cast<MDLexicalBlockBase>(Scope))
+    Parent = getOrCreateAbstractScope(Block->getScope());
+
   I = AbstractScopeMap.emplace(std::piecewise_construct,
                                std::forward_as_tuple(Scope),
                                std::forward_as_tuple(Parent, Scope,
                                                      nullptr, true)).first;
-  if (Scope.isSubprogram())
+  if (isa<MDSubprogram>(Scope))
     AbstractScopesList.push_back(&I->second);
   return &I->second;
 }
@@ -280,7 +269,7 @@ void LexicalScopes::assignInstructionRanges(
 /// have machine instructions that belong to lexical scope identified by
 /// DebugLoc.
 void LexicalScopes::getMachineBasicBlocks(
-    DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
+    const MDLocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) {
   MBBs.clear();
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
@@ -303,7 +292,7 @@ void LexicalScopes::getMachineBasicBlocks(
 
 /// dominates - Return true if DebugLoc's lexical scope dominates at least one
 /// machine instruction's lexical scope in a given machine basic block.
-bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
+bool LexicalScopes::dominates(const MDLocation *DL, MachineBasicBlock *MBB) {
   LexicalScope *Scope = getOrCreateLexicalScope(DL);
   if (!Scope)
     return false;
@@ -315,12 +304,10 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
   bool Result = false;
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
        ++I) {
-    DebugLoc IDL = I->getDebugLoc();
-    if (IDL.isUnknown())
-      continue;
-    if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
-      if (Scope->dominates(IScope))
-        return true;
+    if (const MDLocation *IDL = I->getDebugLoc())
+      if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
+        if (Scope->dominates(IScope))
+          return true;
   }
   return Result;
 }
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index e3791be..c2993db 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -158,10 +158,10 @@ public:
   UserValue *getNext() const { return next; }
 
   /// match - Does this UserValue match the parameters?
-  bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset,
-             bool indirect) const {
-    return Var == Variable && Expr == Expression && Offset == offset &&
-           indirect == IsIndirect;
+  bool match(const MDNode *Var, const MDNode *Expr, const MDLocation *IA,
+             unsigned Offset, bool indirect) const {
+    return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA &&
+           Offset == offset && indirect == IsIndirect;
   }
 
   /// merge - Merge equivalence classes.
@@ -269,12 +269,6 @@ public:
   void emitDebugValues(VirtRegMap *VRM,
                        LiveIntervals &LIS, const TargetInstrInfo &TRI);
 
-  /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
-  /// variable may have more than one corresponding DBG_VALUE instructions.
-  /// Only first one needs DebugLoc to identify variable's lexical scope
-  /// in source file.
-  DebugLoc findDebugLoc();
-
   /// getDebugLoc - Return DebugLoc of this UserValue.
   DebugLoc getDebugLoc() { return dl;}
   void print(raw_ostream &, const TargetRegisterInfo *);
@@ -363,10 +357,47 @@ public:
 };
 } // namespace
 
+static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
+                          const LLVMContext &Ctx) {
+  if (!DL)
+    return;
+
+  auto *Scope = cast<MDScope>(DL.getScope());
+  // Omit the directory, because it's likely to be long and uninteresting.
+  CommentOS << Scope->getFilename();
+  CommentOS << ':' << DL.getLine();
+  if (DL.getCol() != 0)
+    CommentOS << ':' << DL.getCol();
+
+  DebugLoc InlinedAtDL = DL.getInlinedAt();
+  if (!InlinedAtDL)
+    return;
+
+  CommentOS << " @[ ";
+  printDebugLoc(InlinedAtDL, CommentOS, Ctx);
+  CommentOS << " ]";
+}
+
+static void printExtendedName(raw_ostream &OS, const MDLocalVariable *V,
+                              const MDLocation *DL) {
+  const LLVMContext &Ctx = V->getContext();
+  StringRef Res = V->getName();
+  if (!Res.empty())
+    OS << Res << "," << V->getLine();
+  if (auto *InlinedAt = DL->getInlinedAt()) {
+    if (DebugLoc InlinedAtDL = InlinedAt) {
+      OS << " @[";
+      printDebugLoc(InlinedAtDL, OS, Ctx);
+      OS << "]";
+    }
+  }
+}
+
 void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
-  DIVariable DV(Variable);
+  DIVariable DV = cast<MDLocalVariable>(Variable);
   OS << "!\"";
-  DV.printExtendedName(OS);
+  printExtendedName(OS, DV, dl);
+
   OS << "\"\t";
   if (offset)
     OS << '+' << offset;
@@ -433,7 +464,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr,
     UserValue *UV = Leader->getLeader();
     Leader = UV;
     for (; UV; UV = UV->getNext())
-      if (UV->match(Var, Expr, Offset, IsIndirect))
+      if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect))
         return UV;
   }
 
@@ -942,11 +973,6 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
                               std::next(MachineBasicBlock::iterator(MI));
 }
 
-DebugLoc UserValue::findDebugLoc() {
-  DebugLoc D = dl;
-  dl = DebugLoc();
-  return D;
-}
 void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
                                  unsigned LocNo,
                                  LiveIntervals &LIS,
@@ -955,11 +981,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
   MachineOperand &Loc = locations[LocNo];
   ++NumInsertedDebugValues;
 
+  assert(cast<MDLocalVariable>(Variable)
+             ->isValidLocationForIntrinsic(getDebugLoc()) &&
+         "Expected inlined-at fields to agree");
   if (Loc.isReg())
-    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
+    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
             IsIndirect, Loc.getReg(), offset, Variable, Expression);
   else
-    BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+    BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
         .addOperand(Loc)
         .addImm(offset)
         .addMetadata(Variable)
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 9748329..fe296bc 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -38,7 +38,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
 
   LiveDebugVariables();
-  ~LiveDebugVariables();
+  ~LiveDebugVariables() override;
 
   /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
   /// @param OldReg Old virtual register that is going away.
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 2afd7fa..d75e441 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -816,23 +816,45 @@ static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR,
 
 static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) {
   SmallPtrSet<const MachineBasicBlock*, 5> Visited;
-  for (LiveRange::Segment &S : LI.segments) {
-    if (S.valno != nullptr)
-      continue;
-    // This can only happen at the begin of a basic block.
-    assert(S.start.isBlock() && "valno should only be missing at block begin");
-
-    Visited.clear();
-    const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
-    for (const MachineBasicBlock *Pred : MBB->predecessors()) {
-      VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
-      if (VNI != nullptr) {
-        S.valno = VNI;
-        break;
+
+  LiveRange::iterator OutIt;
+  VNInfo *PrevValNo = nullptr;
+  for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    LiveRange::Segment &S = *I;
+    // Determine final VNI if necessary.
+    if (S.valno == nullptr) {
+      // This can only happen at the begin of a basic block.
+      assert(S.start.isBlock() && "valno should only be missing at block begin");
+
+      Visited.clear();
+      const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start);
+      for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+        VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited);
+        if (VNI != nullptr) {
+          S.valno = VNI;
+          break;
+        }
       }
+      assert(S.valno != nullptr && "could not determine valno");
+    }
+    // Merge with previous segment if it has the same VNI.
+    if (PrevValNo == S.valno && OutIt->end == S.start) {
+      OutIt->end = S.end;
+    } else {
+      // Didn't merge. Move OutIt to next segment.
+      if (PrevValNo == nullptr)
+        OutIt = LI.begin();
+      else
+        ++OutIt;
+
+      if (OutIt != I)
+        *OutIt = *I;
+      PrevValNo = S.valno;
     }
-    assert(S.valno != nullptr && "could not determine valno");
   }
+  // If we merged some segments chop off the end.
+  ++OutIt;
+  LI.segments.erase(OutIt, LI.end());
 }
 
 void LiveInterval::constructMainRangeFromSubranges(
@@ -955,6 +977,12 @@ void LiveInterval::constructMainRangeFromSubranges(
             NeedVNIFixup = true;
         }
 
+        // In rare cases we can produce adjacent segments with the same value
+        // number (if they come from different subranges, but happen to have
+        // the same defining instruction). VNIFixup will fix those cases.
+        if (!empty() && segments.back().end == Pos &&
+            segments.back().valno == VNI)
+          NeedVNIFixup = true;
         CurrentSegment.start = Pos;
         CurrentSegment.valno = VNI;
         ConstructingSegment = true;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 98359b1..34131bb 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -250,7 +250,7 @@ std::string MachineBasicBlock::getFullName() const {
   if (getBasicBlock())
     Name += getBasicBlock()->getName();
   else
-    Name += (Twine("BB") + Twine(getNumber())).str();
+    Name += ("BB" + Twine(getNumber())).str();
   return Name;
 }
 
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index ecc50c9..2969bad 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -661,7 +661,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
   for (MachineBasicBlock *MBB : L.getBlocks()) {
     BlockChain &Chain = *BlockToChain[MBB];
     // Ensure that this block is at the end of a chain; otherwise it could be
-    // mid-way through an inner loop or a successor of an analyzable branch.
+    // mid-way through an inner loop or a successor of an unanalyzable branch.
     if (MBB != *std::prev(Chain.end()))
       continue;
 
@@ -715,7 +715,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
       // a frequency higher than the current exit before we consider breaking
       // the layout.
       BranchProbability Bias(100 - ExitBlockBias, 100);
-      if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
+      if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
           ExitEdgeFreq > BestExitEdgeFreq ||
           (MBB->isLayoutSuccessor(Succ) &&
            !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
@@ -724,8 +724,8 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
       }
     }
 
-    // Restore the old exiting state, no viable looping successor was found.
     if (!HasLoopingSucc) {
+      // Restore the old exiting state, no viable looping successor was found.
       ExitingBB = OldExitingBB;
       BestExitEdgeFreq = OldBestExitEdgeFreq;
       continue;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 6ceace8..448531f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -380,7 +380,7 @@ namespace llvm {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getName().str() + "' function";
+      return ("CFG for '" + F->getName() + "' function").str();
     }
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -468,7 +468,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   SmallString<60> Name;
   raw_svector_ostream(Name)
     << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.GetOrCreateSymbol(Name);
 }
 
 /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 1240efb..d154110 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -881,8 +881,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
   }
   // If DebugLoc does not match then two dbg.values are not identical.
   if (isDebugValue())
-    if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
-        && getDebugLoc() != Other->getDebugLoc())
+    if (getDebugLoc() && Other->getDebugLoc() &&
+        getDebugLoc() != Other->getDebugLoc())
       return false;
   return true;
 }
@@ -1619,12 +1619,9 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
     }
     if (isDebugValue() && MO.isMetadata()) {
       // Pretty print DBG_VALUE instructions.
-      const MDNode *MD = MO.getMetadata();
-      DIDescriptor DI(MD);
-      DIVariable DIV(MD);
-
-      if (DI.isVariable() && !DIV.getName().empty())
-        OS << "!\"" << DIV.getName() << '\"';
+      DIVariable DIV = dyn_cast<MDLocalVariable>(MO.getMetadata());
+      if (DIV && !DIV->getName().empty())
+        OS << "!\"" << DIV->getName() << '\"';
       else
         MO.print(OS, TRI);
     } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
@@ -1711,13 +1708,13 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
   }
 
   // Print debug location information.
-  if (isDebugValue() && getOperand(e - 1).isMetadata()) {
+  if (isDebugValue() && getOperand(e - 2).isMetadata()) {
     if (!HaveSemi) OS << ";";
-    DIVariable DV(getOperand(e - 1).getMetadata());
-    OS << " line no:" <<  DV.getLineNumber();
-    if (MDNode *InlinedAt = DV.getInlinedAt()) {
-      DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
-      if (!InlinedAtDL.isUnknown() && MF) {
+    DIVariable DV = cast<MDLocalVariable>(getOperand(e - 2).getMetadata());
+    OS << " line no:" <<  DV->getLine();
+    if (auto *InlinedAt = debugLoc->getInlinedAt()) {
+      DebugLoc InlinedAtDL(InlinedAt);
+      if (InlinedAtDL && MF) {
         OS << " inlined @[ ";
 	InlinedAtDL.print(OS);
         OS << " ]";
@@ -1725,7 +1722,7 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
     }
     if (isIndirectDebugValue())
       OS << " indirect";
-  } else if (!debugLoc.isUnknown() && MF) {
+  } else if (debugLoc && MF) {
     if (!HaveSemi) OS << ";";
     OS << " dbg:";
     debugLoc.print(OS);
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 2f65a2e..9756f13 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -10,10 +10,6 @@
 // This pass performs loop invariant code motion on machine instructions. We
 // attempt to remove as much code from the body of a loop as possible.
 //
-// This pass does not attempt to throttle itself to limit register pressure.
-// The register allocation phases are expected to perform rematerialization
-// to recover when register pressure is high.
-//
 // This pass is not intended to be a replacement or a complete alternative
 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
 // constructs that are not exposed before lowering and instruction selection.
@@ -104,7 +100,7 @@ namespace {
     SmallSet<unsigned, 32> RegSeen;
     SmallVector<unsigned, 8> RegPressure;
 
-    // Register pressure "limit" per register class. If the pressure
+    // Register pressure "limit" per register pressure set. If the pressure
     // is higher than the limit, then it's considered high.
     SmallVector<unsigned, 8> RegLimit;
 
@@ -214,7 +210,8 @@ namespace {
     /// CanCauseHighRegPressure - Visit BBs from header to current BB,
     /// check if hoisting an instruction of the given cost matrix can cause high
     /// register pressure.
-    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
+    bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
+                                 bool Cheap);
 
     /// UpdateBackTraceRegPressure - Traverse the back trace from header to
     /// the current block and update their register pressures to reflect the
@@ -254,21 +251,25 @@ namespace {
     /// if there is little to no overhead moving instructions into loops.
     void SinkIntoLoop();
 
-    /// getRegisterClassIDAndCost - For a given MI, register, and the operand
-    /// index, return the ID and cost of its representative register class by
-    /// reference.
-    void getRegisterClassIDAndCost(const MachineInstr *MI,
-                                   unsigned Reg, unsigned OpIdx,
-                                   unsigned &RCId, unsigned &RCCost) const;
-
     /// InitRegPressure - Find all virtual register references that are liveout
     /// of the preheader to initialize the starting "register pressure". Note
     /// this does not count live through (livein but not used) registers.
     void InitRegPressure(MachineBasicBlock *BB);
 
+    /// calcRegisterCost - Calculate the additional register pressure that the
+    /// registers used in MI cause.
+    ///
+    /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+    /// figure out which usages are live-ins.
+    /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
+    DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
+                                             bool ConsiderSeen,
+                                             bool ConsiderUnseenAsDef);
+
     /// UpdateRegPressure - Update estimate of register pressure after the
     /// specified instruction.
-    void UpdateRegPressure(const MachineInstr *MI);
+    void UpdateRegPressure(const MachineInstr *MI,
+                           bool ConsiderUnseenAsDef = false);
 
     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
     /// the load itself could be hoisted. Return the unfolded and hoistable
@@ -354,13 +355,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
 
   if (PreRegAlloc) {
     // Estimate register pressure during pre-regalloc pass.
-    unsigned NumRC = TRI->getNumRegClasses();
-    RegPressure.resize(NumRC);
+    unsigned NumRPS = TRI->getNumRegPressureSets();
+    RegPressure.resize(NumRPS);
     std::fill(RegPressure.begin(), RegPressure.end(), 0);
-    RegLimit.resize(NumRC);
-    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-           E = TRI->regclass_end(); I != E; ++I)
-      RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+    RegLimit.resize(NumRPS);
+    for (unsigned i = 0, e = NumRPS; i != e; ++i)
+      RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
   }
 
   // Get our Loop information...
@@ -836,23 +836,6 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
   return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
 }
 
-/// getRegisterClassIDAndCost - For a given MI, register, and the operand
-/// index, return the ID and cost of its representative register class.
-void
-MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
-                                       unsigned Reg, unsigned OpIdx,
-                                       unsigned &RCId, unsigned &RCCost) const {
-  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
-  MVT VT = *RC->vt_begin();
-  if (VT == MVT::Untyped) {
-    RCId = RC->getID();
-    RCCost = 1;
-  } else {
-    RCId = TLI->getRepRegClassFor(VT)->getID();
-    RCCost = TLI->getRepRegClassCostFor(VT);
-  }
-}
-
 /// InitRegPressure - Find all virtual register references that are liveout of
 /// the preheader to initialize the starting "register pressure". Note this
 /// does not count live through (livein but not used) registers.
@@ -870,41 +853,30 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
       InitRegPressure(*BB->pred_begin());
   }
 
-  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
-       MII != E; ++MII) {
-    MachineInstr *MI = &*MII;
-    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI->getOperand(i);
-      if (!MO.isReg() || MO.isImplicit())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(Reg))
-        continue;
-
-      bool isNew = RegSeen.insert(Reg).second;
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-      if (MO.isDef())
-        RegPressure[RCId] += RCCost;
-      else {
-        bool isKill = isOperandKill(MO, MRI);
-        if (isNew && !isKill)
-          // Haven't seen this, it must be a livein.
-          RegPressure[RCId] += RCCost;
-        else if (!isNew && isKill)
-          RegPressure[RCId] -= RCCost;
-      }
-    }
-  }
+  for (const MachineInstr &MI : *BB)
+    UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
 }
 
 /// UpdateRegPressure - Update estimate of register pressure after the
 /// specified instruction.
-void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
-  if (MI->isImplicitDef())
-    return;
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
+                                    bool ConsiderUnseenAsDef) {
+  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
+  for (const auto &RPIdAndCost : Cost) {
+    unsigned Class = RPIdAndCost.first;
+    if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
+      RegPressure[Class] = 0;
+    else
+      RegPressure[Class] += RPIdAndCost.second;
+  }
+}
 
-  SmallVector<unsigned, 4> Defs;
+DenseMap<unsigned, int>
+MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
+                              bool ConsiderUnseenAsDef) {
+  DenseMap<unsigned, int> Cost;
+  if (MI->isImplicitDef())
+    return Cost;
   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -913,27 +885,33 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
 
-    bool isNew = RegSeen.insert(Reg).second;
+    // FIXME: It seems bad to use RegSeen only for some of these calculations.
+    bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+
+    RegClassWeight W = TRI->getRegClassWeight(RC);
+    int RCCost = 0;
     if (MO.isDef())
-      Defs.push_back(Reg);
-    else if (!isNew && isOperandKill(MO, MRI)) {
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-      if (RCCost > RegPressure[RCId])
-        RegPressure[RCId] = 0;
+      RCCost = W.RegWeight;
+    else {
+      bool isKill = isOperandKill(MO, MRI);
+      if (isNew && !isKill && ConsiderUnseenAsDef)
+        // Haven't seen this, it must be a livein.
+        RCCost = W.RegWeight;
+      else if (!isNew && isKill)
+        RCCost = -W.RegWeight;
+    }
+    if (RCCost == 0)
+      continue;
+    const int *PS = TRI->getRegClassPressureSets(RC);
+    for (; *PS != -1; ++PS) {
+      if (Cost.find(*PS) == Cost.end())
+        Cost[*PS] = RCCost;
       else
-        RegPressure[RCId] -= RCCost;
+        Cost[*PS] += RCCost;
     }
   }
-
-  unsigned Idx = 0;
-  while (!Defs.empty()) {
-    unsigned Reg = Defs.pop_back_val();
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
-    RegPressure[RCId] += RCCost;
-    ++Idx;
-  }
+  return Cost;
 }
 
 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction
@@ -1125,27 +1103,23 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
 /// if hoisting an instruction of the given cost matrix can cause high
 /// register pressure.
-bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
                                           bool CheapInstr) {
-  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
-       CI != CE; ++CI) {
-    if (CI->second <= 0)
+  for (const auto &RPIdAndCost : Cost) {
+    if (RPIdAndCost.second <= 0)
       continue;
 
-    unsigned RCId = CI->first;
-    unsigned Limit = RegLimit[RCId];
-    int Cost = CI->second;
+    unsigned Class = RPIdAndCost.first;
+    int Limit = RegLimit[Class];
 
     // Don't hoist cheap instructions if they would increase register pressure,
     // even if we're under the limit.
     if (CheapInstr && !HoistCheapInsts)
       return true;
 
-    for (unsigned i = BackTrace.size(); i != 0; --i) {
-      SmallVectorImpl<unsigned> &RP = BackTrace[i-1];
-      if (RP[RCId] + Cost >= Limit)
+    for (const auto &RP : BackTrace)
+      if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
         return true;
-    }
   }
 
   return false;
@@ -1155,46 +1129,15 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
 /// current block and update their register pressures to reflect the effect
 /// of hoisting MI from the current block to the preheader.
 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
-  if (MI->isImplicitDef())
-    return;
-
   // First compute the 'cost' of the instruction, i.e. its contribution
   // to register pressure.
-  DenseMap<unsigned, int> Cost;
-  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg() || MO.isImplicit())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
-    if (MO.isDef()) {
-      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-      if (CI != Cost.end())
-        CI->second += RCCost;
-      else
-        Cost.insert(std::make_pair(RCId, RCCost));
-    } else if (isOperandKill(MO, MRI)) {
-      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-      if (CI != Cost.end())
-        CI->second -= RCCost;
-      else
-        Cost.insert(std::make_pair(RCId, -RCCost));
-    }
-  }
+  auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
+                               /*ConsiderUnseenAsDef=*/false);
 
   // Update register pressure of blocks from loop header to current block.
-  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
-    SmallVectorImpl<unsigned> &RP = BackTrace[i];
-    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
-         CI != CE; ++CI) {
-      unsigned RCId = CI->first;
-      RP[RCId] += CI->second;
-    }
-  }
+  for (auto &RP : BackTrace)
+    for (const auto &RPIdAndCost : Cost)
+      RP[RPIdAndCost.first] += RPIdAndCost.second;
 }
 
 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
@@ -1229,15 +1172,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (TII->isTriviallyReMaterializable(&MI, AA))
     return true;
 
-  // Estimate register pressure to determine whether to LICM the instruction.
-  // In low register pressure situation, we can be more aggressive about
-  // hoisting. Also, favors hoisting long latency instructions even in
-  // moderately high pressure situation.
-  // Cheap instructions will only be hoisted if they don't increase register
-  // pressure at all.
   // FIXME: If there are long latency loop-invariant instructions inside the
   // loop at this point, why didn't the optimizer's LICM hoist them?
-  DenseMap<unsigned, int> Cost;
   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -1245,24 +1181,22 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
-
-    unsigned RCId, RCCost;
-    getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
-    if (MO.isDef()) {
-      if (HasHighOperandLatency(MI, i, Reg)) {
-        DEBUG(dbgs() << "Hoist High Latency: " << MI);
-        ++NumHighLatency;
-        return true;
-      }
-      Cost[RCId] += RCCost;
-    } else if (isOperandKill(MO, MRI)) {
-      // Is a virtual register use is a kill, hoisting it out of the loop
-      // may actually reduce register pressure or be register pressure
-      // neutral.
-      Cost[RCId] -= RCCost;
+    if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
+      DEBUG(dbgs() << "Hoist High Latency: " << MI);
+      ++NumHighLatency;
+      return true;
     }
   }
 
+  // Estimate register pressure to determine whether to LICM the instruction.
+  // In low register pressure situation, we can be more aggressive about
+  // hoisting. Also, favors hoisting long latency instructions even in
+  // moderately high pressure situation.
+  // Cheap instructions will only be hoisted if they don't increase register
+  // pressure at all.
+  auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
+                               /*ConsiderUnseenAsDef=*/false);
+
   // Visit BBs from header to current BB, if hoisting this doesn't cause
   // high register pressure, then it's safe to proceed.
   if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index fca7df0..e8bd1f8 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -425,6 +426,12 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
     Personalities.push_back(Personality);
 }
 
+void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad,
+                                      int State) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.WinEHState = State;
+}
+
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
 ///
 void MachineModuleInfo::
@@ -563,10 +570,13 @@ const Function *MachineModuleInfo::getPersonality() const {
 }
 
 EHPersonality MachineModuleInfo::getPersonalityType() {
-  if (PersonalityTypeCache == EHPersonality::Unknown)
-    PersonalityTypeCache = classifyEHPersonality(getPersonality());
+  if (PersonalityTypeCache == EHPersonality::Unknown) {
+    if (const Function *F = getPersonality())
+      PersonalityTypeCache = classifyEHPersonality(F);
+  }
   return PersonalityTypeCache;
 }
+
 /// getPersonalityIndex - Return unique index for current personality
 /// function. NULL/first personality function should always get zero index.
 unsigned MachineModuleInfo::getPersonalityIndex() const {
@@ -588,3 +598,18 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   // in the zero index.
   return 0;
 }
+
+const Function *MachineModuleInfo::getWinEHParent(const Function *F) const {
+  StringRef WinEHParentName =
+      F->getFnAttribute("wineh-parent").getValueAsString();
+  if (WinEHParentName.empty() || WinEHParentName == F->getName())
+    return F;
+  return F->getParent()->getFunction(WinEHParentName);
+}
+
+WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) {
+  auto &Ptr = FuncInfoMap[getWinEHParent(F)];
+  if (!Ptr)
+    Ptr.reset(new WinEHFuncInfo);
+  return *Ptr;
+}
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index a1c7e9f..22d519e 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -31,15 +31,14 @@ static int SortSymbolPair(const void *LHS, const void *RHS) {
   return LHSS->getName().compare(RHSS->getName());
 }
 
-/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
-/// sorted orer.
-MachineModuleInfoImpl::SymbolListTy
-MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
-                                      MachineModuleInfoImpl::StubValueTy>&Map) {
+MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs(
+    DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) {
   MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
 
   if (!List.empty())
     qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+
+  Map.clear();
   return List;
 }
 
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 7a3c80b..a52d05f 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1036,8 +1036,6 @@ void ScheduleDAGMILive::schedule() {
 
     scheduleMI(SU, IsTopNode);
 
-    updateQueues(SU, IsTopNode);
-
     if (DFSResult) {
       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
       if (!ScheduledTrees.test(SubtreeID)) {
@@ -1049,6 +1047,8 @@ void ScheduleDAGMILive::schedule() {
 
     // Notify the scheduling strategy after updating the DAG.
     SchedImpl->schedNode(SU, IsTopNode);
+
+    updateQueues(SU, IsTopNode);
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 8aacd1f..5dc7c21 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -463,13 +463,11 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
   // Run an upwards post-order search for the trace start.
   Bounds.Downward = false;
   Bounds.Visited.clear();
-  typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
-  for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
-       I != E; ++I) {
+  for (auto I : inverse_post_order_ext(MBB, Bounds)) {
     DEBUG(dbgs() << "  pred for BB#" << I->getNumber() << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the predecessors have been visited, pick the preferred one.
-    TBI.Pred = pickTracePred(*I);
+    TBI.Pred = pickTracePred(I);
     DEBUG({
       if (TBI.Pred)
         dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
@@ -477,19 +475,17 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
         dbgs() << "null\n";
     });
     // The trace leading to I is now known, compute the depth resources.
-    computeDepthResources(*I);
+    computeDepthResources(I);
   }
 
   // Run a downwards post-order search for the trace end.
   Bounds.Downward = true;
   Bounds.Visited.clear();
-  typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
-  for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
-       I != E; ++I) {
+  for (auto I : post_order_ext(MBB, Bounds)) {
     DEBUG(dbgs() << "  succ for BB#" << I->getNumber() << ": ");
     TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
     // All the successors have been visited, pick the preferred one.
-    TBI.Succ = pickTraceSucc(*I);
+    TBI.Succ = pickTraceSucc(I);
     DEBUG({
       if (TBI.Succ)
         dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
@@ -497,7 +493,7 @@ void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
         dbgs() << "null\n";
     });
     // The trace leaving I is now known, compute the height resources.
-    computeHeightResources(*I);
+    computeHeightResources(I);
   }
 }
 
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index ad59fc9..55f08e4 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -141,7 +141,7 @@ namespace {
         TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
         SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs);
 
-    ~SchedulePostRATDList();
+    ~SchedulePostRATDList() override;
 
     /// startBlock - Initialize register live-range state for scheduling in
     /// this block.
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e073e6a..5334a63 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
@@ -752,6 +753,25 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   if (!TFI.needsFrameIndexResolution(Fn)) return;
 
+  MachineModuleInfo &MMI = Fn.getMMI();
+  const Function *F = Fn.getFunction();
+  const Function *ParentF = MMI.getWinEHParent(F);
+  unsigned FrameReg;
+  if (F == ParentF) {
+    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
+    // FIXME: This should be unconditional but we have bugs in the preparation
+    // pass.
+    if (FuncInfo.UnwindHelpFrameIdx != INT_MAX)
+      FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP(
+          Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg);
+  } else if (MMI.hasWinEHFuncInfo(F)) {
+    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
+    auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F);
+    if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end())
+      FuncInfo.CatchHandlerParentFrameObjOffset[F] =
+          TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg);
+  }
+
   // Store SPAdj at exit of a basic block.
   SmallVector<int, 8> SPState;
   SPState.resize(Fn.getNumBlockIDs());
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index c621414..c311c7b 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -301,13 +301,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
       const MDNode *Expr = DBG->getDebugExpression();
       bool IsIndirect = DBG->isIndirectDebugValue();
       uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0;
-      DebugLoc DL;
-      if (MI == MBB->end()) {
-        // If MI is at basic block end then use last instruction's location.
-        MachineBasicBlock::iterator EI = MI;
-        DL = (--EI)->getDebugLoc();
-      } else
-        DL = MI->getDebugLoc();
+      DebugLoc DL = DBG->getDebugLoc();
+      assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       MachineInstr *NewDV =
           BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE))
               .addFrameIndex(FI)
@@ -877,6 +873,9 @@ void RAFast::AllocateBasicBlock() {
               const MDNode *Expr = MI->getDebugExpression();
               DebugLoc DL = MI->getDebugLoc();
               MachineBasicBlock *MBB = MI->getParent();
+              assert(
+                  cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+                  "Expected inlined-at fields to agree");
               MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL,
                                             TII->get(TargetOpcode::DBG_VALUE))
                                         .addFrameIndex(SS)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index e94f1bb..26f42c9 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -538,8 +538,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
     // Giant live ranges fall back to the global assignment heuristic, which
     // prevents excessive spilling in pathological cases.
     bool ReverseLocal = TRI->reverseLocalAssignment();
+    const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
     bool ForceGlobal = !ReverseLocal &&
-      (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs());
+      (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
 
     if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
         LIS->intervalIsInOneMBB(*LI)) {
@@ -552,10 +553,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
         // Allocating bottom up may allow many short LRGs to be assigned first
         // to one of the cheap registers. This could be much faster for very
         // large blocks on targets with many physical registers.
-        Prio = Indexes->getZeroIndex().getInstrDistance(LI->beginIndex());
+        Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
       }
-    }
-    else {
+      Prio |= RC.AllocationPriority << 24;
+    } else {
       // Allocate global and split ranges in long->short order. Long ranges that
       // don't fit should be spilled (or split) ASAP so they don't create
       // interference.  Mark a bit to prioritize global above local ranges.
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 9e3cf41..5d958a6 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -2731,15 +2731,19 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
   assert(Copy.isCopyLike());
   if (!UseTerminalRule)
     return false;
+  unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
+  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
   // Check if the destination of this copy has any other affinity.
-  unsigned DstReg = Copy.getOperand(0).getReg();
   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      // If SrcReg is a physical register, the copy won't be coalesced.
+      // Ignoring it may have other side effect (like missing
+      // rematerialization). So keep it.
+      TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
       !isTerminalReg(DstReg, Copy, MRI))
     return false;
 
   // DstReg is a terminal node. Check if it inteferes with any other
   // copy involving SrcReg.
-  unsigned SrcReg = Copy.getOperand(1).getReg();
   const MachineBasicBlock *OrigBB = Copy.getParent();
   const LiveInterval &DstLI = LIS->getInterval(DstReg);
   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
@@ -2751,9 +2755,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
     // For now, just consider the copies that are in the same block.
     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
       continue;
-    unsigned OtherReg = MI.getOperand(0).getReg();
+    unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
+    isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+                OtherSubReg);
     if (OtherReg == SrcReg)
-      OtherReg = MI.getOperand(1).getReg();
+      OtherReg = OtherSrcReg;
     // Check if OtherReg is a non-terminal.
     if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
         isTerminalReg(OtherReg, MI, MRI))
@@ -2775,25 +2781,45 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   // yet, it might invalidate the iterator.
   const unsigned PrevSize = WorkList.size();
   if (JoinGlobalCopies) {
+    SmallVector<MachineInstr*, 2> LocalTerminals;
+    SmallVector<MachineInstr*, 2> GlobalTerminals;
     // Coalesce copies bottom-up to coalesce local defs before local uses. They
     // are not inherently easier to resolve, but slightly preferable until we
     // have local live range splitting. In particular this is required by
     // cmp+jmp macro fusion.
     for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
          MII != E; ++MII) {
-      if (!MII->isCopyLike() || applyTerminalRule(*MII))
+      if (!MII->isCopyLike())
         continue;
-      if (isLocalCopy(&(*MII), LIS))
-        LocalWorkList.push_back(&(*MII));
-      else
-        WorkList.push_back(&(*MII));
+      bool ApplyTerminalRule = applyTerminalRule(*MII);
+      if (isLocalCopy(&(*MII), LIS)) {
+        if (ApplyTerminalRule)
+          LocalTerminals.push_back(&(*MII));
+        else
+          LocalWorkList.push_back(&(*MII));
+      } else {
+        if (ApplyTerminalRule)
+          GlobalTerminals.push_back(&(*MII));
+        else
+          WorkList.push_back(&(*MII));
+      }
     }
+    // Append the copies evicted by the terminal rule at the end of the list.
+    LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
+    WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
   }
   else {
+    SmallVector<MachineInstr*, 2> Terminals;
      for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
           MII != E; ++MII)
-       if (MII->isCopyLike() && !applyTerminalRule(*MII))
-         WorkList.push_back(MII);
+       if (MII->isCopyLike()) {
+        if (applyTerminalRule(*MII))
+          Terminals.push_back(&(*MII));
+        else
+          WorkList.push_back(MII);
+       }
+     // Append the copies evicted by the terminal rule at the end of the list.
+     WorkList.append(Terminals.begin(), Terminals.end());
   }
   // Try coalescing the collected copies immediately, and remove the nulls.
   // This prevents the WorkList from getting too large since most copies are
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a1c84c5..22fd6d6 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -705,16 +705,8 @@ static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
           EltVT.getSizeInBits() >= SplatBitSize);
 }
 
-// \brief Returns the SDNode if it is a constant BuildVector or constant.
-static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
-  if (isa<ConstantSDNode>(N))
-    return N.getNode();
-  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
-  if (BV && BV->isConstant())
-    return BV;
-  return nullptr;
-}
-
+// \brief Returns the SDNode if it is a constant integer BuildVector
+// or constant integer.
 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
   if (isa<ConstantSDNode>(N))
     return N.getNode();
@@ -723,6 +715,8 @@ static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
   return nullptr;
 }
 
+// \brief Returns the SDNode if it is a constant float BuildVector
+// or constant float.
 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
   if (isa<ConstantFPSDNode>(N))
     return N.getNode();
@@ -773,8 +767,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
                                     SDValue N0, SDValue N1) {
   EVT VT = N0.getValueType();
   if (N0.getOpcode() == Opc) {
-    if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) {
-      if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) {
+    if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+      if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
@@ -793,8 +787,8 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
   }
 
   if (N1.getOpcode() == Opc) {
-    if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) {
-      if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) {
+    if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
+      if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
@@ -1583,8 +1577,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (add x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1604,7 +1598,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
   // fold (add x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -1624,8 +1619,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                          N0C->getAPIntValue(), VT),
                          N0.getOperand(1));
   // reassociate add
-  SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
-  if (RADD.getNode())
+  if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
     return RADD;
   // fold ((0-A) + B) -> B-A
   if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
@@ -1828,8 +1822,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (sub x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
@@ -1984,26 +1978,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   APInt ConstValue0, ConstValue1;
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
     N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
   } else {
-    N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
-    ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
-                            : APInt();
-    N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
-    ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
-                            : APInt();
+    N0IsConst = isa<ConstantSDNode>(N0);
+    if (N0IsConst)
+      ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+    N1IsConst = isa<ConstantSDNode>(N1);
+    if (N1IsConst)
+      ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
   }
 
   // fold (mul c1, c2) -> c1*c2
   if (N0IsConst && N1IsConst)
     return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
 
-  // canonicalize constant to RHS
-  if (N0IsConst && !N1IsConst)
+  // canonicalize constant to RHS (vector doesn't have to splat)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   // fold (mul x, 0) -> 0
   if (N1IsConst && ConstValue1 == 0)
@@ -2083,8 +2078,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                    N0.getOperand(1), N1));
 
   // reassociate mul
-  SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
-  if (RMUL.getNode())
+  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
     return RMUL;
 
   return SDValue();
@@ -2096,10 +2090,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (sdiv c1, c2) -> c1/c2
   ConstantSDNode *N0C = isConstOrConstSplat(N0);
@@ -2163,7 +2156,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
     return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
   }
 
-  // if integer divide is expensive and we satisfy the requirements, emit an
+  // If integer divide is expensive and we satisfy the requirements, emit an
   // alternate sequence.
   if (N1C && !TLI.isIntDivCheap()) {
     SDValue Op = BuildSDIV(N);
@@ -2186,10 +2179,9 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (udiv c1, c2) -> c1/c2
   ConstantSDNode *N0C = isConstOrConstSplat(N0);
@@ -2459,8 +2451,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // If the type twice as wide is legal, transform the mulhu to a wider multiply
-  // plus a shift.
+  // If the type is twice as wide is legal, transform the mulhu to a wider
+  // multiply plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
     MVT Simple = VT.getSimpleVT();
     unsigned SimpleSize = Simple.getSizeInBits();
@@ -2489,8 +2481,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
 
-  // If the type twice as wide is legal, transform the mulhu to a wider multiply
-  // plus a shift.
+  // If the type is twice as wide is legal, transform the mulhu to a wider
+  // multiply plus a shift.
   if (VT.isSimple() && !VT.isVector()) {
     MVT Simple = VT.getSimpleVT();
     unsigned SimpleSize = Simple.getSizeInBits();
@@ -2809,8 +2801,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (and x, 0) -> 0, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -2839,7 +2831,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   // fold (and x, -1) -> x
   if (N1C && N1C->isAllOnesValue())
@@ -2850,8 +2843,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                                    APInt::getAllOnesValue(BitWidth)))
     return DAG.getConstant(0, VT);
   // reassociate and
-  SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
-  if (RAND.getNode())
+  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
     return RAND;
   // fold (and (or x, C), D) -> D if (C & D) == D
   if (N1C && N0.getOpcode() == ISD::OR)
@@ -3470,8 +3462,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (or x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3556,7 +3548,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   // fold (or x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -3580,8 +3573,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     return BSwap;
 
   // reassociate or
-  SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
-  if (ROR.getNode())
+  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
     return ROR;
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   // iff (c1 & c2) == 0.
@@ -3875,8 +3867,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
 
   // fold vector ops
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     // fold (xor x, 0) -> x, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -3899,14 +3891,14 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
   // canonicalize constant to RHS
-  if (N0C && !N1C)
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   // fold (xor x, 0) -> x
   if (N1C && N1C->isNullValue())
     return N0;
   // reassociate xor
-  SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
-  if (RXOR.getNode())
+  if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
     return RXOR;
 
   // fold !(x cc y) -> (x !cc y)
@@ -4152,8 +4144,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // fold vector ops
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
     // If setcc produces all-one true value then:
@@ -4332,8 +4324,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // fold vector ops
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N1C = isConstOrConstSplat(N1);
   }
@@ -4478,8 +4470,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // fold vector ops
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
     N1C = isConstOrConstSplat(N1);
   }
@@ -4889,7 +4881,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
       SDValue N1_0 = N1->getOperand(0);
       SDValue N1_1 = N1->getOperand(1);
       SDValue N1_2 = N1->getOperand(2);
-      if (N1_2 == N2) {
+      if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
         // Create the actual and node if we can generate good code for it.
         if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
@@ -4908,7 +4900,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
       SDValue N2_0 = N2->getOperand(0);
       SDValue N2_1 = N2->getOperand(1);
       SDValue N2_2 = N2->getOperand(2);
-      if (N2_1 == N1) {
+      if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
         // Create the actual or node if we can generate good code for it.
         if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
@@ -7037,7 +7029,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
 
   // Finally, this must be the case where we are shrinking elements: each input
   // turns into multiple outputs.
-  bool isS2V = ISD::isScalarToVector(BV);
   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
                             NumOutputsPerInput*BV->getNumOperands());
@@ -7055,10 +7046,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
       APInt ThisVal = OpVal.trunc(DstBitSize);
       Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
-      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
-        // Simply turn this into a SCALAR_TO_VECTOR of the new type.
-        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
-                           Ops[0]);
       OpVal = OpVal.lshr(DstBitSize);
     }
 
@@ -7206,10 +7193,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fadd c1, c2) -> c1 + c2
   if (N0CFP && N1CFP)
@@ -7400,10 +7386,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fsub c1, c2) -> c1-c2
   if (N0CFP && N1CFP)
@@ -7552,15 +7537,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   // fold vector ops
   if (VT.isVector()) {
     // This just handles C1 * C2 for vectors. Other vector folds are below.
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
-    // Canonicalize vector constant to RHS.
-    if (N0.getOpcode() == ISD::BUILD_VECTOR &&
-        N1.getOpcode() != ISD::BUILD_VECTOR)
-      if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
-        if (BV0->isConstant())
-          return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
   }
 
   // fold (fmul c1, c2) -> c1*c2
@@ -7568,7 +7546,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
     return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
 
   // canonicalize constant to RHS
-  if (N0CFP && !N1CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0) &&
+     !isConstantFPBuildVectorOrConstantFP(N1))
     return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
 
   // fold (fmul A, 1.0) -> A
@@ -7734,10 +7713,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // fold vector ops
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
   // fold (fdiv c1, c2) -> c1/c2
   if (N0CFP && N1CFP)
@@ -8216,11 +8194,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
 
 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (fceil c1) -> fceil(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -8228,11 +8205,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
 
 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (ftrunc c1) -> ftrunc(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -8240,11 +8216,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
 
 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   EVT VT = N->getValueType(0);
 
   // fold (ffloor c1) -> ffloor(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
 
   return SDValue();
@@ -10080,19 +10055,19 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
 
   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
-  unsigned EarliestNodeUsed = 0;
+  unsigned LatestNodeUsed = 0;
 
   for (unsigned i=0; i < NumElem; ++i) {
     // Find a chain for the new wide-store operand. Notice that some
     // of the store nodes that we found may not be selected for inclusion
     // in the wide store. The chain we use needs to be the chain of the
-    // earliest store node which is *used* and replaced by the wide store.
-    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-      EarliestNodeUsed = i;
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
   }
 
-  // The earliest Node in the DAG.
-  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  // The latest Node in the DAG.
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
   SDLoc DL(StoreNodes[0].MemNode);
 
   SDValue StoredVal;
@@ -10151,17 +10126,17 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
     StoredVal = DAG.getConstant(StoreInt, StoreTy);
   }
 
-  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+  SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
                                   FirstInChain->getBasePtr(),
                                   FirstInChain->getPointerInfo(),
                                   false, false,
                                   FirstInChain->getAlignment());
 
-  // Replace the first store with the new store
-  CombineTo(EarliestOp, NewStore);
+  // Replace the last store with the new store
+  CombineTo(LatestOp, NewStore);
   // Erase all other stores.
   for (unsigned i = 0; i < NumElem ; ++i) {
-    if (StoreNodes[i].MemNode == EarliestOp)
+    if (StoreNodes[i].MemNode == LatestOp)
       continue;
     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
     // ReplaceAllUsesWith will replace all uses that existed when it was
@@ -10538,18 +10513,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (NumElem < 2)
     return false;
 
-  // The earliest Node in the DAG.
-  unsigned EarliestNodeUsed = 0;
-  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  // The latest Node in the DAG.
+  unsigned LatestNodeUsed = 0;
   for (unsigned i=1; i<NumElem; ++i) {
     // Find a chain for the new wide-store operand. Notice that some
     // of the store nodes that we found may not be selected for inclusion
     // in the wide store. The chain we use needs to be the chain of the
-    // earliest store node which is *used* and replaced by the wide store.
-    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-      EarliestNodeUsed = i;
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
   }
 
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+
   // Find if it is better to use vectors or integers to load and store
   // to memory.
   EVT JointMemOpVT;
@@ -10571,7 +10547,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
                                 false, false, false,
                                 FirstLoad->getAlignment());
 
-  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+  SDValue NewStore = DAG.getStore(LatestOp->getChain(), StoreDL, NewLoad,
                                   FirstInChain->getBasePtr(),
                                   FirstInChain->getPointerInfo(), false, false,
                                   FirstInChain->getAlignment());
@@ -10589,12 +10565,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
   }
 
-  // Replace the first store with the new store.
-  CombineTo(EarliestOp, NewStore);
+  // Replace the last store with the new store.
+  CombineTo(LatestOp, NewStore);
   // Erase all other stores.
   for (unsigned i = 0; i < NumElem ; ++i) {
     // Remove all Store nodes.
-    if (StoreNodes[i].MemNode == EarliestOp)
+    if (StoreNodes[i].MemNode == LatestOp)
       continue;
     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
     DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
@@ -11523,6 +11499,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   return SDValue();
 }
 
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT OpVT = N->getOperand(0).getValueType();
+
+  // If the operands are legal vectors, leave them alone.
+  if (TLI.isTypeLegal(OpVT))
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  SmallVector<SDValue, 8> Ops;
+
+  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+  // Keep track of what we encounter.
+  bool AnyInteger = false;
+  bool AnyFP = false;
+  for (const SDValue &Op : N->ops()) {
+    if (ISD::BITCAST == Op.getOpcode() &&
+        !Op.getOperand(0).getValueType().isVector())
+      Ops.push_back(Op.getOperand(0));
+    else if (ISD::UNDEF == Op.getOpcode())
+      Ops.push_back(ScalarUndef);
+    else
+      return SDValue();
+
+    // Note whether we encounter an integer or floating point scalar.
+    // If it's neither, bail out, it could be something weird like x86mmx.
+    EVT LastOpVT = Ops.back().getValueType();
+    if (LastOpVT.isFloatingPoint())
+      AnyFP = true;
+    else if (LastOpVT.isInteger())
+      AnyInteger = true;
+    else
+      return SDValue();
+  }
+
+  // If any of the operands is a floating point scalar bitcast to a vector,
+  // use floating point types throughout, and bitcast everything.  
+  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+  if (AnyFP) {
+    SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+    ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+    if (AnyInteger) {
+      for (SDValue &Op : Ops) {
+        if (Op.getValueType() == SVT)
+          continue;
+        if (Op.getOpcode() == ISD::UNDEF)
+          Op = ScalarUndef;
+        else
+          Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
+      }
+    }
+  }
+
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+                               VT.getSizeInBits() / SVT.getSizeInBits());
+  return DAG.getNode(ISD::BITCAST, DL, VT,
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
+}
+
 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
   // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
@@ -11538,9 +11576,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(VT);
 
-  // Optimize concat_vectors where one of the vectors is undef.
-  if (N->getNumOperands() == 2 &&
-      N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+  // Optimize concat_vectors where all but the first of the vectors are undef.
+  if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+        return Op.getOpcode() == ISD::UNDEF;
+      })) {
     SDValue In = N->getOperand(0);
     assert(In.getValueType().isVector() && "Must concat vectors");
 
@@ -11548,6 +11587,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     if (In->getOpcode() == ISD::BITCAST &&
         !In->getOperand(0)->getValueType(0).isVector()) {
       SDValue Scalar = In->getOperand(0);
+
+      // If the bitcast type isn't legal, it might be a trunc of a legal type;
+      // look through the trunc so we can still do the transform:
+      //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+      if (Scalar->getOpcode() == ISD::TRUNCATE &&
+          !TLI.isTypeLegal(Scalar.getValueType()) &&
+          TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+        Scalar = Scalar->getOperand(0);
+
       EVT SclTy = Scalar->getValueType(0);
 
       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
@@ -11615,6 +11663,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   }
 
+  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+    return V;
+
   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   // nodes often generate nop CONCAT_VECTOR nodes.
   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -11676,7 +11728,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     // type.
     if (V->getOperand(0).getValueType() != NVT)
       return SDValue();
-    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned Idx = N->getConstantOperandVal(1);
     unsigned NumElems = NVT.getVectorNumElements();
     assert((Idx % NumElems) == 0 &&
            "IDX in concat is not a multiple of the result vector length.");
@@ -12001,6 +12053,43 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       return V;
   }
 
+  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+    SmallVector<SDValue, 8> Ops;
+    for (int M : SVN->getMask()) {
+      SDValue Op = DAG.getUNDEF(VT.getScalarType());
+      if (M >= 0) {
+        int Idx = M % NumElts;
+        SDValue &S = (M < (int)NumElts ? N0 : N1);
+        if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
+          Op = S.getOperand(Idx);
+        } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
+          if (Idx == 0)
+            Op = S.getOperand(0);
+        } else {
+          // Operand can't be combined - bail out.
+          break;
+        }
+      }
+      Ops.push_back(Op);
+    }
+    if (Ops.size() == VT.getVectorNumElements()) {
+      // BUILD_VECTOR requires all inputs to be of the same type, find the
+      // maximum type and extend them all.
+      EVT SVT = VT.getScalarType();
+      if (SVT.isInteger())
+        for (SDValue &Op : Ops)
+          SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+      if (SVT != VT.getScalarType())
+        for (SDValue &Op : Ops)
+          Op = TLI.isZExtFree(Op.getValueType(), SVT)
+                   ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
+                   : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
+      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
+    }
+  }
+
   // If this shuffle only has a single input that is a bitcasted shuffle,
   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
   // back to their original types.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 223a149..5ffb826 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -425,7 +425,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
 
   // Check if the second operand is a constant and handle it appropriately.
   if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
-    uint64_t Imm = CI->getZExtValue();
+    uint64_t Imm = CI->getSExtValue();
 
     // Transform "sdiv exact X, 8" -> "sra X, 3".
     if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) &&
@@ -1079,11 +1079,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
   // The donothing intrinsic does, well, nothing.
   case Intrinsic::donothing:
     return true;
+  case Intrinsic::eh_actions: {
+    unsigned ResultReg = getRegForValue(UndefValue::get(II->getType()));
+    if (!ResultReg)
+      return false;
+    updateValueMap(II, ResultReg);
+    return true;
+  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
-    DIVariable DIVar(DI->getVariable());
-    assert((!DIVar || DIVar.isVariable()) &&
-           "Variable in DbgDeclareInst should be either null or a DIVariable.");
+    DIVariable DIVar = DI->getVariable();
     if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) {
       DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
@@ -1124,6 +1129,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
                                      false);
 
     if (Op) {
+      assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+             "Expected inlined-at fields to agree");
       if (Op->isReg()) {
         Op->setIsDebug(true);
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1148,6 +1155,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     const DbgValueInst *DI = cast<DbgValueInst>(II);
     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     const Value *V = DI->getValue();
+    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+           "Expected inlined-at fields to agree");
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
       // help debugging.  Probably the optimizer should not do this.
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 291b583..4b8ae32 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -79,12 +80,35 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) {
   return ExtendKind;
 }
 
+namespace {
+struct WinEHNumbering {
+  WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo), NextState(0) {}
+
+  WinEHFuncInfo &FuncInfo;
+  int NextState;
+
+  SmallVector<ActionHandler *, 4> HandlerStack;
+  SmallPtrSet<const Function *, 4> VisitedHandlers;
+
+  int currentEHNumber() const {
+    return HandlerStack.empty() ? -1 : HandlerStack.back()->getEHState();
+  }
+
+  void createUnwindMapEntry(int ToState, ActionHandler *AH);
+  void createTryBlockMapEntry(int TryLow, int TryHigh,
+                              ArrayRef<CatchHandler *> Handlers);
+  void processCallSite(ArrayRef<ActionHandler *> Actions, ImmutableCallSite CS);
+  void calculateStateNumbers(const Function &F);
+};
+}
+
 void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                SelectionDAG *DAG) {
   Fn = &fn;
   MF = &mf;
   TLI = MF->getSubtarget().getTargetLowering();
   RegInfo = &MF->getRegInfo();
+  MachineModuleInfo &MMI = MF->getMMI();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -178,13 +202,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       // during the initial isel pass through the IR so that it is done
       // in a predictable order.
       if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
-        MachineModuleInfo &MMI = MF->getMMI();
-        DIVariable DIVar(DI->getVariable());
-        assert((!DIVar || DIVar.isVariable()) &&
-          "Variable in DbgDeclareInst should be either null or a DIVariable.");
-        if (MMI.hasDebugInfo() &&
-            DIVar &&
-            !DI->getDebugLoc().isUnknown()) {
+        DIVariable DIVar = DI->getVariable();
+        if (MMI.hasDebugInfo() && DIVar && DI->getDebugLoc()) {
           // Don't handle byval struct arguments or VLAs, for example.
           // Non-byval arguments are handled here (they refer to the stack
           // temporary alloca at this point).
@@ -252,8 +271,179 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
 
   // Mark landing pad blocks.
   for (BB = Fn->begin(); BB != EB; ++BB)
-    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+    if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
       MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+
+  // Calculate EH numbers for WinEH.
+  if (fn.hasFnAttribute("wineh-parent")) {
+    const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
+    WinEHFuncInfo &FI = MMI.getWinEHFuncInfo(WinEHParentFn);
+    if (FI.LandingPadStateMap.empty()) {
+      WinEHNumbering Num(FI);
+      Num.calculateStateNumbers(*WinEHParentFn);
+      // Pop everything on the handler stack.
+      Num.processCallSite(None, ImmutableCallSite());
+    }
+  }
+}
+
+void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
+  WinEHUnwindMapEntry UME;
+  UME.ToState = ToState;
+  if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
+    UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
+  else
+    UME.Cleanup = nullptr;
+  FuncInfo.UnwindMap.push_back(UME);
+}
+
+void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
+                                            ArrayRef<CatchHandler *> Handlers) {
+  WinEHTryBlockMapEntry TBME;
+  TBME.TryLow = TryLow;
+  TBME.TryHigh = TryHigh;
+  assert(TBME.TryLow <= TBME.TryHigh);
+  for (CatchHandler *CH : Handlers) {
+    WinEHHandlerType HT;
+    if (CH->getSelector()->isNullValue()) {
+      HT.Adjectives = 0x40;
+      HT.TypeDescriptor = nullptr;
+    } else {
+      auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
+      // Selectors are always pointers to GlobalVariables with 'struct' type.
+      // The struct has two fields, adjectives and a type descriptor.
+      auto *CS = cast<ConstantStruct>(GV->getInitializer());
+      HT.Adjectives =
+          cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
+      HT.TypeDescriptor =
+          cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
+    }
+    HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
+    HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
+    TBME.HandlerArray.push_back(HT);
+  }
+  FuncInfo.TryBlockMap.push_back(TBME);
+}
+
+static void print_name(const Value *V) {
+#ifndef NDEBUG
+  if (!V) {
+    DEBUG(dbgs() << "null");
+    return;
+  }
+
+  if (const auto *F = dyn_cast<Function>(V))
+    DEBUG(dbgs() << F->getName());
+  else
+    DEBUG(V->dump());
+#endif
+}
+
+void WinEHNumbering::processCallSite(ArrayRef<ActionHandler *> Actions,
+                                     ImmutableCallSite CS) {
+  int FirstMismatch = 0;
+  for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
+       ++FirstMismatch) {
+    if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
+        Actions[FirstMismatch]->getHandlerBlockOrFunc())
+      break;
+    delete Actions[FirstMismatch];
+  }
+
+  bool EnteringScope = (int)Actions.size() > FirstMismatch;
+
+  // Don't recurse while we are looping over the handler stack.  Instead, defer
+  // the numbering of the catch handlers until we are done popping.
+  SmallVector<CatchHandler *, 4> PoppedCatches;
+  for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
+    if (auto *CH = dyn_cast<CatchHandler>(HandlerStack.back())) {
+      PoppedCatches.push_back(CH);
+    } else {
+      // Delete cleanup handlers
+      delete HandlerStack.back();
+    }
+    HandlerStack.pop_back();
+  }
+
+  // We need to create a new state number if we are exiting a try scope and we
+  // will not push any more actions.
+  int TryHigh = NextState - 1;
+  if (!EnteringScope && !PoppedCatches.empty()) {
+    createUnwindMapEntry(currentEHNumber(), nullptr);
+    ++NextState;
+  }
+
+  int LastTryLowIdx = 0;
+  for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
+    CatchHandler *CH = PoppedCatches[I];
+    if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
+      int TryLow = CH->getEHState();
+      auto Handlers =
+          makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
+      createTryBlockMapEntry(TryLow, TryHigh, Handlers);
+      LastTryLowIdx = I + 1;
+    }
+  }
+
+  for (CatchHandler *CH : PoppedCatches) {
+    if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc()))
+      calculateStateNumbers(*F);
+    delete CH;
+  }
+
+  bool LastActionWasCatch = false;
+  for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
+    // We can reuse eh states when pushing two catches for the same invoke.
+    bool CurrActionIsCatch = isa<CatchHandler>(Actions[I]);
+    // FIXME: Reenable this optimization!
+    if (CurrActionIsCatch && LastActionWasCatch && false) {
+      Actions[I]->setEHState(currentEHNumber());
+    } else {
+      createUnwindMapEntry(currentEHNumber(), Actions[I]);
+      Actions[I]->setEHState(NextState);
+      NextState++;
+      DEBUG(dbgs() << "Creating unwind map entry for: (");
+      print_name(Actions[I]->getHandlerBlockOrFunc());
+      DEBUG(dbgs() << ", " << currentEHNumber() << ")\n");
+    }
+    HandlerStack.push_back(Actions[I]);
+    LastActionWasCatch = CurrActionIsCatch;
+  }
+
+  DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
+  print_name(CS ? CS.getCalledValue() : nullptr);
+  DEBUG(dbgs() << '\n');
+}
+
+void WinEHNumbering::calculateStateNumbers(const Function &F) {
+  auto I = VisitedHandlers.insert(&F);
+  if (!I.second)
+    return; // We've already visited this handler, don't renumber it.
+
+  DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
+  SmallVector<ActionHandler *, 4> ActionList;
+  for (const BasicBlock &BB : F) {
+    for (const Instruction &I : BB) {
+      const auto *CI = dyn_cast<CallInst>(&I);
+      if (!CI || CI->doesNotThrow())
+        continue;
+      processCallSite(None, CI);
+    }
+    const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
+      continue;
+    const LandingPadInst *LPI = II->getLandingPadInst();
+    auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
+    if (!ActionsCall)
+      continue;
+    assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
+    parseEHActions(ActionsCall, ActionList);
+    processCallSite(ActionList, II);
+    ActionList.clear();
+    FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
+  }
+
+  FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
 }
 
 /// clear - Clear out all the function-specific state. This returns this
@@ -462,8 +652,7 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
   if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
     for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
       Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
-           i != e; ++i) {
+      for (auto i : post_order(T)) {
         if (i->isFloatingPointTy()) {
           MMI->setUsesVAFloatArgument(true);
           return;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 93699a7..64d606a 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -650,6 +650,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
   MDNode *Var = SD->getVariable();
   MDNode *Expr = SD->getExpression();
   DebugLoc DL = SD->getDebugLoc();
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
 
   if (SD->getKind() == SDDbgValue::FRAMEIX) {
     // Stack address; this needs to be lowered in target-dependent fashion.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index ece38f3..4a28a4b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4033,6 +4033,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Node->getOpcode() == ISD::SETCC) {
     OVT = Node->getOperand(0).getSimpleValueType();
   }
+  if (Node->getOpcode() == ISD::BR_CC)
+    OVT = Node->getOperand(2).getSimpleValueType();
   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   SDLoc dl(Node);
   SDValue Tmp1, Tmp2, Tmp3;
@@ -4188,11 +4190,28 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
   }
+  case ISD::BR_CC: {
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(1))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3));
+    Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0),
+                                  Node->getOperand(0), Node->getOperand(1),
+                                  Tmp1, Tmp2, Node->getOperand(4)));
+    break;
+  }
   case ISD::FADD:
   case ISD::FSUB:
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:
+  case ISD::FCOPYSIGN:
   case ISD::FPOW: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
@@ -4201,10 +4220,40 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp3, DAG.getIntPtrConstant(0)));
     break;
   }
-  case ISD::FLOG2:
-  case ISD::FEXP2:
+  case ISD::FMA: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
+    Results.push_back(
+        DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                    DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
+                    DAG.getIntPtrConstant(0)));
+    break;
+  }
+  case ISD::FPOWI: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp3, DAG.getIntPtrConstant(0)));
+    break;
+  }
+  case ISD::FFLOOR:
+  case ISD::FCEIL:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::FROUND:
+  case ISD::FTRUNC:
+  case ISD::FNEG:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
   case ISD::FLOG:
-  case ISD::FEXP: {
+  case ISD::FLOG2:
+  case ISD::FLOG10:
+  case ISD::FABS:
+  case ISD::FEXP:
+  case ISD::FEXP2: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index cef3fc9..9de85d7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -593,6 +593,7 @@ private:
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
+  SDValue SplitVecOp_TruncateHelper(SDNode *N, unsigned TruncateOp);
 
   SDValue SplitVecOp_BITCAST(SDNode *N);
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -600,7 +601,6 @@ private:
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
-  SDValue SplitVecOp_TRUNCATE(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 03c2734..408d5ed 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -379,8 +379,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
 
   // There are currently two cases of vector promotion:
   // 1) Bitcasting a vector of integers to a different type to a vector of the
-  //    same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64.
-  // 2) Extending a vector of floats to a vector of the same number oflarger
+  //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
+  // 2) Extending a vector of floats to a vector of the same number of larger
   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
   MVT VT = Op.getSimpleValueType();
   assert(Op.getNode()->getNumValues() == 1 &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f7e4557..f000902 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1293,7 +1293,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
     case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
-    case ISD::TRUNCATE:          Res = SplitVecOp_TRUNCATE(N); break;
+    case ISD::TRUNCATE:
+      Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
+      break;
     case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1304,20 +1306,32 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::VSELECT:
       Res = SplitVecOp_VSELECT(N, OpNo);
       break;
-    case ISD::CTTZ:
-    case ISD::CTLZ:
-    case ISD::CTPOP:
-    case ISD::FP_EXTEND:
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
+      if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+        Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE);
+      else
+        Res = SplitVecOp_UnaryOp(N);
+      break;
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
-    case ISD::FTRUNC:
+      if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0)))
+        Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
+      else
+        Res = SplitVecOp_UnaryOp(N);
+      break;
+    case ISD::CTTZ:
+    case ISD::CTLZ:
+    case ISD::CTPOP:
+    case ISD::FP_EXTEND:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::ANY_EXTEND:
       Res = SplitVecOp_UnaryOp(N);
       break;
+    case ISD::FTRUNC:
+      Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC);
+      break;
     }
   }
 
@@ -1581,7 +1595,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N,
+                                                    unsigned TruncateOp) {
   // The result type is legal, but the input type is illegal.  If splitting
   // ends up with the result type of each half still being legal, just
   // do that.  If, however, that would result in an illegal result type,
@@ -1624,8 +1639,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
                                 NumElements/2);
-  SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
-  SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+  SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+  SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
   // Concatenate them to get the full intermediate truncation result.
   EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
   SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -1634,7 +1649,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
   // type. This should normally be something that ends up being legal directly,
   // but in theory if a target has very wide vectors and an annoyingly
   // restricted set of legal types, this split can chain to build things up.
-  return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+  return DAG.getNode(TruncateOp, DL, OutVT, InterVec);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8b54e656..fd0fa31 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -173,7 +173,7 @@ public:
       HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
   }
 
-  ~ScheduleDAGRRList() {
+  ~ScheduleDAGRRList() override {
     delete HazardRec;
     delete AvailableQueue;
   }
@@ -1423,9 +1423,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
 
       // If one or more successors has been unscheduled, then the current
       // node is no longer available.
-      if (!TrySU->isAvailable)
+      if (!TrySU->isAvailable || !TrySU->NodeQueueId)
         CurSU = AvailableQueue->pop();
       else {
+        // Available and in AvailableQueue
         AvailableQueue->remove(TrySU);
         CurSU = TrySU;
       }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 2cd1f4b..6351fa2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -44,7 +44,7 @@ namespace llvm {
 
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
-    virtual ~ScheduleDAGSDNodes() {}
+    ~ScheduleDAGSDNodes() override {}
 
     /// Run - perform scheduling.
     ///
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 418b58e..eee4a4b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -76,7 +76,7 @@ public:
     HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
   }
 
-  ~ScheduleDAGVLIW() {
+  ~ScheduleDAGVLIW() override {
     delete HazardRec;
     delete AvailableQueue;
   }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b52f648..770f0b2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2851,10 +2851,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
         // FIXME: Entirely reasonable to perform folding of other unary
         // operations here as the need arises.
         break;
+      case ISD::TRUNCATE:
+        // Constant build vector truncation can be done with the original scalar
+        // operands but with a new build vector with the truncated value type.
+        return getNode(ISD::BUILD_VECTOR, DL, VT, BV->ops());
       case ISD::FNEG:
       case ISD::FABS:
+      case ISD::FCEIL:
+      case ISD::FTRUNC:
+      case ISD::FFLOOR:
       case ISD::FP_EXTEND:
-      case ISD::TRUNCATE:
       case ISD::UINT_TO_FP:
       case ISD::SINT_TO_FP: {
         // Let the above scalar folding handle the folding of each element.
@@ -2870,6 +2876,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
         }
         if (Ops.size() == VT.getVectorNumElements())
           return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        break;
       }
       }
     }
@@ -3628,7 +3635,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
 
     CSEMap.InsertNode(N, IP);
   } else {
-
     N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
   }
 
@@ -3791,12 +3797,27 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
     return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
   }
 
-  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+  assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
+  EVT IntVT = VT.getScalarType();
+  if (!IntVT.isInteger())
+    IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
   if (NumBits > 8) {
     // Use a multiplication with 0x010101... to extend the input to the
     // required length.
     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
-    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+    Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
+                        DAG.getConstant(Magic, IntVT));
+  }
+
+  if (VT != Value.getValueType() && !VT.isInteger())
+    Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value);
+  if (VT != Value.getValueType()) {
+    assert(VT.getVectorElementType() == Value.getValueType() &&
+           "value type should be one vector element here");
+    SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value);
+    Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps);
   }
 
   return Value;
@@ -4276,7 +4297,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
 SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol, bool AlwaysInline,
-                                MachinePointerInfo DstPtrInfo,
+                                bool isTailCall, MachinePointerInfo DstPtrInfo,
                                 MachinePointerInfo SrcPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
@@ -4334,15 +4355,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
-                                 unsigned Align, bool isVol,
+                                 unsigned Align, bool isVol, bool isTailCall,
                                  MachinePointerInfo DstPtrInfo,
                                  MachinePointerInfo SrcPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
@@ -4389,15 +4411,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 
 SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
-                                unsigned Align, bool isVol,
+                                unsigned Align, bool isVol, bool isTailCall,
                                 MachinePointerInfo DstPtrInfo) {
   assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
 
@@ -4446,7 +4469,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                Type::getVoidTy(*getContext()),
                getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
                                  TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
+    .setDiscardResult()
+    .setTailCall(isTailCall);
 
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
@@ -5574,8 +5598,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 /// For IROrder, we keep the smaller of the two
 SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
   DebugLoc NLoc = N->getDebugLoc();
-  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) &&
-    (OLoc.getDebugLoc() != NLoc)) {
+  if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
     N->setDebugLoc(DebugLoc());
   }
   unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
@@ -5885,6 +5908,8 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
 SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
                                       unsigned R, bool IsIndirect, uint64_t Off,
                                       DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
 }
 
@@ -5892,6 +5917,8 @@ SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
 SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
                                               const Value *C, uint64_t Off,
                                               DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O);
 }
 
@@ -5899,6 +5926,8 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
 SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
                                                 unsigned FI, uint64_t Off,
                                                 DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6c14e79..32d2aae 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -35,6 +35,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -997,14 +998,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
     const DbgValueInst *DI = DDI.getDI();
     DebugLoc dl = DDI.getdl();
     unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
-    MDNode *Variable = DI->getVariable();
-    MDNode *Expr = DI->getExpression();
+    MDLocalVariable *Variable = DI->getVariable();
+    MDExpression *Expr = DI->getExpression();
+    assert(Variable->isValidLocationForIntrinsic(dl) &&
+           "Expected inlined-at fields to agree");
     uint64_t Offset = DI->getOffset();
     // A dbg.value for an alloca is always indirect.
     bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
     SDDbgValue *SDV;
     if (Val.getNode()) {
-      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect,
+      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
                                     Val)) {
         SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
                               IsIndirect, Offset, dl, DbgSDNodeOrder);
@@ -4447,11 +4450,9 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
 /// argument, create the corresponding DBG_VALUE machine instruction for it now.
 /// At the end of instruction selection, they will be inserted to the entry BB.
-bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
-                                                   MDNode *Variable,
-                                                   MDNode *Expr, int64_t Offset,
-                                                   bool IsIndirect,
-                                                   const SDValue &N) {
+bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
+    const Value *V, MDLocalVariable *Variable, MDExpression *Expr,
+    MDLocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) {
   const Argument *Arg = dyn_cast<Argument>(V);
   if (!Arg)
     return false;
@@ -4460,8 +4461,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
   const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
   // Ignore inlined function arguments here.
+  //
+  // FIXME: Should we be checking DL->inlinedAt() to determine this?
   DIVariable DV(Variable);
-  if (DV.isInlinedFnArgument(MF.getFunction()))
+  if (!DV->getScope()->getSubprogram()->describes(MF.getFunction()))
     return false;
 
   Optional<MachineOperand> Op;
@@ -4502,13 +4505,15 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V,
   if (!Op)
     return false;
 
+  assert(Variable->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
   if (Op->isReg())
     FuncInfo.ArgDbgValues.push_back(
-        BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE),
-                IsIndirect, Op->getReg(), Offset, Variable, Expr));
+        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+                Op->getReg(), Offset, Variable, Expr));
   else
     FuncInfo.ArgDbgValues.push_back(
-        BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE))
+        BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
             .addOperand(*Op)
             .addImm(Offset)
             .addMetadata(Variable)
@@ -4589,9 +4594,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false,
-                              MachinePointerInfo(I.getArgOperand(0)),
-                              MachinePointerInfo(I.getArgOperand(1))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                               false, isTC,
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1)));
+    updateDAGForMaybeTailCall(MC);
     return nullptr;
   }
   case Intrinsic::memset: {
@@ -4608,8 +4616,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                              MachinePointerInfo(I.getArgOperand(0))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                               isTC, MachinePointerInfo(I.getArgOperand(0)));
+    updateDAGForMaybeTailCall(MS);
     return nullptr;
   }
   case Intrinsic::memmove: {
@@ -4628,19 +4638,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     if (!Align)
       Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment.
     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-    DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                               MachinePointerInfo(I.getArgOperand(0)),
-                               MachinePointerInfo(I.getArgOperand(1))));
+    bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
+    SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+                                isTC, MachinePointerInfo(I.getArgOperand(0)),
+                                MachinePointerInfo(I.getArgOperand(1)));
+    updateDAGForMaybeTailCall(MM);
     return nullptr;
   }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
-    MDNode *Variable = DI.getVariable();
-    MDNode *Expression = DI.getExpression();
+    MDLocalVariable *Variable = DI.getVariable();
+    MDExpression *Expression = DI.getExpression();
     const Value *Address = DI.getAddress();
-    DIVariable DIVar(Variable);
-    assert((!DIVar || DIVar.isVariable()) &&
-      "Variable in DbgDeclareInst should be either null or a DIVariable.");
+    DIVariable DIVar = Variable;
     if (!Address || !DIVar) {
       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return nullptr;
@@ -4663,7 +4673,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         Address = BCI->getOperand(0);
       // Parameters are handled specially.
       bool isParameter =
-        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+        (DIVariable(Variable)->getTag() == dwarf::DW_TAG_arg_variable ||
          isa<Argument>(Address));
 
       const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
@@ -4677,7 +4687,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         else {
           // Address is an argument, so try to emit its dbg value using
           // virtual register info from the FuncInfo.ValueMap.
-          EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N);
+          EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+                                   N);
           return nullptr;
         }
       } else if (AI)
@@ -4694,7 +4705,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     } else {
       // If Address is an argument then try to emit its dbg value using
       // virtual register info from the FuncInfo.ValueMap.
-      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false,
+      if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
                                     N)) {
         // If variable is pinned by a alloca in dominating bb then
         // use StaticAllocaMap.
@@ -4717,14 +4728,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::dbg_value: {
     const DbgValueInst &DI = cast<DbgValueInst>(I);
-    DIVariable DIVar(DI.getVariable());
-    assert((!DIVar || DIVar.isVariable()) &&
-      "Variable in DbgValueInst should be either null or a DIVariable.");
+    DIVariable DIVar = DI.getVariable();
     if (!DIVar)
       return nullptr;
 
-    MDNode *Variable = DI.getVariable();
-    MDNode *Expression = DI.getExpression();
+    MDLocalVariable *Variable = DI.getVariable();
+    MDExpression *Expression = DI.getExpression();
     uint64_t Offset = DI.getOffset();
     const Value *V = DI.getValue();
     if (!V)
@@ -4745,7 +4754,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (N.getNode()) {
         // A dbg.value for an alloca is always indirect.
         bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
-        if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset,
+        if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
                                       IsIndirect, N)) {
           SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
                                 IsIndirect, Offset, dl, SDNodeOrder);
@@ -5360,6 +5369,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::clear_cache:
     return TLI.getClearCacheBuiltinName();
+  case Intrinsic::eh_actions:
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return nullptr;
   case Intrinsic::donothing:
     // ignore
     return nullptr;
@@ -5397,14 +5409,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission
     // is the same on all targets.
     for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
-      AllocaInst *Slot =
-          cast<AllocaInst>(I.getArgOperand(Idx)->stripPointerCasts());
+      Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
+      if (isa<ConstantPointerNull>(Arg))
+        continue; // Skip null pointers. They represent a hole in index space.
+      AllocaInst *Slot = cast<AllocaInst>(Arg);
       assert(FuncInfo.StaticAllocaMap.count(Slot) &&
              "can only escape static allocas");
       int FI = FuncInfo.StaticAllocaMap[Slot];
       MCSymbol *FrameAllocSym =
-          MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName(),
-                                                               Idx);
+          MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+              GlobalValue::getRealLinkageName(MF.getName()), Idx);
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
               TII->get(TargetOpcode::FRAME_ALLOC))
           .addSym(FrameAllocSym)
@@ -5424,8 +5438,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
     unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
     MCSymbol *FrameAllocSym =
-        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName(),
-                                                             IdxVal);
+        MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
+            GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
 
     // Create a TargetExternalSymbol for the label to avoid any target lowering
     // that would make this PC relative.
@@ -5446,16 +5460,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::eh_begincatch:
   case Intrinsic::eh_endcatch:
     llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
-  case Intrinsic::eh_unwindhelp: {
-    AllocaInst *Slot =
-        cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
-    assert(FuncInfo.StaticAllocaMap.count(Slot) &&
-           "can only use static allocas with llvm.eh.unwindhelp");
-    int FI = FuncInfo.StaticAllocaMap[Slot];
-    // TODO: Save this in the not-yet-existant WinEHFuncInfo struct.
-    (void)FI;
-    return nullptr;
-  }
   }
 }
 
@@ -5546,6 +5550,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     // Skip the first return-type Attribute to get to params.
     Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
     Args.push_back(Entry);
+
+    // If we have an explicit sret argument that is an Instruction, (i.e., it
+    // might point to function-local memory), we can't meaningfully tail-call.
+    if (Entry.isSRet && isa<Instruction>(V))
+      isTailCall = false;
   }
 
   // Check if target-independent constraints permit a tail call here.
@@ -7183,6 +7192,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     Entry.Alignment = Align;
     CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
     CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+
+    // sret demotion isn't compatible with tail-calls, since the sret argument
+    // points into the callers stack frame.
+    CLI.IsTailCall = false;
   } else {
     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
       EVT VT = RetTys[I];
@@ -7790,3 +7803,17 @@ MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
     return nullptr;
   return I;
 }
+
+/// During lowering new call nodes can be created (such as memset, etc.).
+/// Those will become new roots of the current DAG, but complications arise
+/// when they are tail calls. In such cases, the call lowering will update
+/// the root, but the builder still needs to know that a tail call has been
+/// lowered in order to avoid generating an additional return.
+void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
+  // If the node is null, we do have a tail call.
+  if (MaybeTC.getNode() != nullptr)
+    DAG.setRoot(MaybeTC);
+  else
+    HasTailCall = true;
+}
+
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 30240d8..a27f470 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -397,7 +397,6 @@ private:
     StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr),
                                  FailureMBB(nullptr), Guard(nullptr),
                                  GuardReg(0) { }
-    ~StackProtectorDescriptor() { }
 
     /// Returns true if all fields of the stack protector descriptor are
     /// initialized implying that we should/are ready to emit a stack protector.
@@ -823,12 +822,17 @@ private:
   /// EmitFuncArgumentDbgValue - If V is an function argument then create
   /// corresponding DBG_VALUE machine instruction for it now. At the end of
   /// instruction selection, they will be inserted to the entry BB.
-  bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr,
+  bool EmitFuncArgumentDbgValue(const Value *V, MDLocalVariable *Variable,
+                                MDExpression *Expr, MDLocation *DL,
                                 int64_t Offset, bool IsIndirect,
                                 const SDValue &N);
 
   /// Return the next block after MBB, or nullptr if there is none.
   MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
+
+  /// Update the DAG and DAG builder with the relevant information after
+  /// a new root node has been created which could be a tail call.
+  void updateDAGForMaybeTailCall(SDValue MaybeTC);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5898da4..636c0a7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -520,22 +520,20 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getNodeId() != -1)
     OS << " [ID=" << getNodeId() << ']';
 
-  DebugLoc dl = getDebugLoc();
-  if (G && !dl.isUnknown()) {
-    DIScope
-      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
-    OS << " dbg:";
-    assert((!Scope || Scope.isScope()) &&
-      "Scope of a DebugLoc should be null or a DIScope.");
-    // Omit the directory, since it's usually long and uninteresting.
-    if (Scope)
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << dl.getLine();
-    if (dl.getCol() != 0)
-      OS << ':' << dl.getCol();
-  }
+  if (!G)
+    return;
+
+  MDLocation *L = getDebugLoc();
+  if (!L)
+    return;
+
+  if (auto *Scope = L->getScope())
+    OS << Scope->getFilename();
+  else
+    OS << "<unknown>";
+  OS << ':' << L->getLine();
+  if (unsigned C = L->getColumn())
+    OS << ':' << C;
 }
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4d2af3f..1e116dd 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
@@ -500,12 +501,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       MachineBasicBlock::iterator InsertPos = Def;
       const MDNode *Variable = MI->getDebugVariable();
       const MDNode *Expr = MI->getDebugExpression();
+      DebugLoc DL = MI->getDebugLoc();
       bool IsIndirect = MI->isIndirectDebugValue();
       unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+      assert(cast<MDLocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+             "Expected inlined-at fields to agree");
       // Def is never a terminator here, so it is ok to increment InsertPos.
-      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
-              TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset,
-              Variable, Expr);
+      BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
+              IsIndirect, LDI->second, Offset, Variable, Expr);
 
       // If this vreg is directly copied into an exported register then
       // that COPY instructions also need DBG_VALUE, if it is the only
@@ -523,9 +526,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
         CopyUseMI = nullptr; break;
       }
       if (CopyUseMI) {
+        // Use MI's debug location, which describes where Variable was
+        // declared, rather than whatever is attached to CopyUseMI.
         MachineInstr *NewMI =
-            BuildMI(*MF, CopyUseMI->getDebugLoc(),
-                    TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+            BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
                     CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
         MachineBasicBlock::iterator Pos = CopyUseMI;
         EntryMBB->insertAfter(Pos, NewMI);
@@ -670,8 +674,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 #endif
   {
     BlockNumber = FuncInfo->MBB->getNumber();
-    BlockName = MF->getName().str() + ":" +
-                FuncInfo->MBB->getBasicBlock()->getName().str();
+    BlockName =
+        (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
@@ -929,53 +933,74 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
   MF->getMMI().addPersonality(
       MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts()));
-  if (MF->getMMI().getPersonalityType() == EHPersonality::MSVC_Win64SEH) {
-    // Make virtual registers and a series of labels that fill in values for the
-    // clauses.
-    auto &RI = MF->getRegInfo();
-    FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC);
+  EHPersonality Personality = MF->getMMI().getPersonalityType();
 
-    // Get all invoke BBs that will unwind into the clause BBs.
+  if (isMSVCEHPersonality(Personality)) {
+    SmallVector<MachineBasicBlock *, 4> ClauseBBs;
+    const IntrinsicInst *Actions =
+        dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt());
+    // Get all invoke BBs that unwind to this landingpad.
     SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
                                                   MBB->pred_end());
+    if (Actions && Actions->getIntrinsicID() == Intrinsic::eh_actions) {
+      // If this is a call to llvm.eh.actions followed by indirectbr, then we've
+      // run WinEHPrepare, and we should remove this block from the machine CFG.
+      // Mark the targets of the indirectbr as landingpads instead.
+      for (const BasicBlock *LLVMSucc : successors(LLVMBB)) {
+        MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc];
+        // Add the edge from the invoke to the clause.
+        for (MachineBasicBlock *InvokeBB : InvokeBBs)
+          InvokeBB->addSuccessor(ClauseBB);
+      }
+    } else {
+      // Otherwise, we haven't done the preparation, and we need to invent some
+      // clause basic blocks that branch into the landingpad.
+      // FIXME: Remove this code once SEH preparation works.
+
+      // Make virtual registers and a series of labels that fill in values for
+      // the clauses.
+      auto &RI = MF->getRegInfo();
+      FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC);
+
+      // Emit separate machine basic blocks with separate labels for each clause
+      // before the main landing pad block.
+      MachineInstrBuilder SelectorPHI = BuildMI(
+          *MBB, MBB->begin(), SDB->getCurDebugLoc(),
+          TII->get(TargetOpcode::PHI), FuncInfo->ExceptionSelectorVirtReg);
+      for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) {
+        // Skip filter clauses, we can't implement them.
+        if (LPadInst->isFilter(I))
+          continue;
 
-    // Emit separate machine basic blocks with separate labels for each clause
-    // before the main landing pad block.
-    MachineInstrBuilder SelectorPHI = BuildMI(
-        *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI),
-        FuncInfo->ExceptionSelectorVirtReg);
-    for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) {
-      // Skip filter clauses, we can't implement them yet.
-      if (LPadInst->isFilter(I))
-        continue;
-
-      MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB);
-      MF->insert(MBB, ClauseBB);
+        MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB);
+        MF->insert(MBB, ClauseBB);
 
-      // Add the edge from the invoke to the clause.
-      for (MachineBasicBlock *InvokeBB : InvokeBBs)
-        InvokeBB->addSuccessor(ClauseBB);
+        // Add the edge from the invoke to the clause.
+        for (MachineBasicBlock *InvokeBB : InvokeBBs)
+          InvokeBB->addSuccessor(ClauseBB);
 
-      // Mark the clause as a landing pad or MI passes will delete it.
-      ClauseBB->setIsLandingPad();
+        // Mark the clause as a landing pad or MI passes will delete it.
+        ClauseBB->setIsLandingPad();
 
-      GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I));
+        GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I));
 
-      // Start the BB with a label.
-      MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB);
-      BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II)
-          .addSym(ClauseLabel);
+        // Start the BB with a label.
+        MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB);
+        BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II)
+            .addSym(ClauseLabel);
 
-      // Construct a simple BB that defines a register with the typeid constant.
-      FuncInfo->MBB = ClauseBB;
-      FuncInfo->InsertPt = ClauseBB->end();
-      unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB);
-      CurDAG->setRoot(SDB->getRoot());
-      SDB->clear();
-      CodeGenAndEmitDAG();
+        // Construct a simple BB that defines a register with the typeid
+        // constant.
+        FuncInfo->MBB = ClauseBB;
+        FuncInfo->InsertPt = ClauseBB->end();
+        unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB);
+        CurDAG->setRoot(SDB->getRoot());
+        SDB->clear();
+        CodeGenAndEmitDAG();
 
-      // Add the typeid virtual register to the phi in the main landing pad.
-      SelectorPHI.addReg(VReg).addMBB(ClauseBB);
+        // Add the typeid virtual register to the phi in the main landing pad.
+        SelectorPHI.addReg(VReg).addMBB(ClauseBB);
+      }
     }
 
     // Remove the edge from the invoke to the lpad.
@@ -986,6 +1011,12 @@ void SelectionDAGISel::PrepareEHLandingPad() {
     // pad block.
     FuncInfo->MBB = MBB;
     FuncInfo->InsertPt = MBB->end();
+
+    // Transfer EH state number assigned to the IR block to the MBB.
+    if (Personality == EHPersonality::MSVC_CXX) {
+      WinEHFuncInfo &FI = MF->getMMI().getWinEHFuncInfo(MF->getFunction());
+      MF->getMMI().addWinEHState(MBB, FI.LandingPadStateMap[LPadInst]);
+    }
     return;
   }
 
@@ -1165,7 +1196,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     // Setup an EH landing-pad block.
     FuncInfo->ExceptionPointerVirtReg = 0;
     FuncInfo->ExceptionSelectorVirtReg = 0;
-    if (FuncInfo->MBB->isLandingPad())
+    if (LLVMBB->isLandingPad())
       PrepareEHLandingPad();
 
     // Before doing SelectionDAG ISel, see if FastISel has been requested.
@@ -2552,7 +2583,7 @@ public:
     SelectionDAG::DAGUpdateListener(DAG),
     RecordedNodes(RN), MatchScopes(MS) { }
 
-  void NodeDeleted(SDNode *N, SDNode *E) {
+  void NodeDeleted(SDNode *N, SDNode *E) override {
     // Some early-returns here to avoid the search if we deleted the node or
     // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we
     // do, so it's unnecessary to update matching state at that point).
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 3cc7a98..a9ffa72 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -397,10 +397,11 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
         Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
     Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64));
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
-    // This handles allocas as arguments to the statepoint
-    const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
-    Ops.push_back(
-        Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+    // This handles allocas as arguments to the statepoint (this is only
+    // really meaningful for a deopt value.  For GC, we'd be trying to
+    // relocate the address of the alloca itself?)
+    Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), 
+                                                  Incoming.getValueType()));
   } else {
     // Otherwise, locate a spill slot and explicitly spill it so it
     // can be found by the runtime later.  We currently do not support
@@ -441,27 +442,25 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // heap.  This is basically just here to help catch errors during statepoint
   // insertion. TODO: This should actually be in the Verifier, but we can't get
   // to the GCStrategy from there (yet).
-  if (Builder.GFI) {
-    GCStrategy &S = Builder.GFI->getStrategy();
-    for (const Value *V : Bases) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() &&
-               "non gc managed base pointer found in statepoint");
-      }
+  GCStrategy &S = Builder.GFI->getStrategy();
+  for (const Value *V : Bases) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() &&
+             "non gc managed base pointer found in statepoint");
     }
-    for (const Value *V : Ptrs) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() &&
-               "non gc managed derived pointer found in statepoint");
-      }
+  }
+  for (const Value *V : Ptrs) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() &&
+             "non gc managed derived pointer found in statepoint");
     }
-    for (const Value *V : Relocations) {
-      auto Opt = S.isGCManagedPointer(V);
-      if (Opt.hasValue()) {
-        assert(Opt.getValue() && "non gc managed pointer relocated");
-      }
+  }
+  for (const Value *V : Relocations) {
+    auto Opt = S.isGCManagedPointer(V);
+    if (Opt.hasValue()) {
+      assert(Opt.getValue() && "non gc managed pointer relocated");
     }
   }
 #endif
@@ -523,6 +522,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
     SDValue Incoming = Builder.getValue(V);
     lowerIncomingStatepointValue(Incoming, Ops, Builder);
   }
+
+  // If there are any explicit spill slots passed to the statepoint, record 
+  // them, but otherwise do not do anything special.  These are user provided
+  // allocas and give control over placement to the consumer.  In this case, 
+  // it is the contents of the slot which may get updated, not the pointer to
+  // the alloca
+  for (Value *V : StatepointSite.gc_args()) {
+    SDValue Incoming = Builder.getValue(V);
+    if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
+      // This handles allocas as arguments to the statepoint
+      Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), 
+                                                    Incoming.getValueType()));
+
+    }
+  }
 }
 
 void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
@@ -565,10 +579,8 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
   // TODO: This if should become an assert.  For now, we allow the GCStrategy
   // to be optional for backwards compatibility.  This will only last a short
   // period (i.e. a couple of weeks).
-  if (GFI) {
-    assert(GFI->getStrategy().useStatepoints() &&
-           "GCStrategy does not expect to encounter statepoints");
-  }
+  assert(GFI->getStrategy().useStatepoints() &&
+         "GCStrategy does not expect to encounter statepoints");
 #endif
 
   // Lower statepoint vmstate and gcstate arguments
@@ -614,7 +626,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
   // Add a leading constant argument with the Flags and the calling convention
   // masked together
   CallingConv::ID CallConv = CS.getCallingConv();
-  int Flags = dyn_cast<ConstantInt>(CS.getArgument(2))->getZExtValue();
+  int Flags = cast<ConstantInt>(CS.getArgument(2))->getZExtValue();
   assert(Flags == 0 && "not expected to be used");
   Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64));
   Ops.push_back(
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 66a6a3c..7c0b2bb 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -239,7 +239,7 @@ Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
   Constant *GEPIndices[2] = {
       ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
       ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)};
-  return ConstantExpr::getGetElementPtr(GV, GEPIndices);
+  return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices);
 }
 
 Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
@@ -249,7 +249,7 @@ Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) {
   for (size_t I = 0; I != Roots.size(); I++)
     EltTys.push_back(Roots[I].second->getAllocatedType());
 
-  return StructType::create(EltTys, "gc_stackentry." + F.getName().str());
+  return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str());
 }
 
 /// doInitialization - If this module uses the GC intrinsics, find them now. If
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 2335a88..0635173 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -46,6 +46,8 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 namespace {
 class SjLjEHPrepare : public FunctionPass {
   const TargetMachine *TM;
+  Type *doubleUnderDataTy;
+  Type *doubleUnderJBufTy;
   Type *FunctionContextTy;
   Constant *RegisterFn;
   Constant *UnregisterFn;
@@ -93,12 +95,14 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
   // builtin_setjmp uses a five word jbuf
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   Type *Int32Ty = Type::getInt32Ty(M.getContext());
-  FunctionContextTy = StructType::get(VoidPtrTy,                  // __prev
-                                      Int32Ty,                    // call_site
-                                      ArrayType::get(Int32Ty, 4), // __data
-                                      VoidPtrTy, // __personality
-                                      VoidPtrTy, // __lsda
-                                      ArrayType::get(VoidPtrTy, 5), // __jbuf
+  doubleUnderDataTy = ArrayType::get(Int32Ty, 4);
+  doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5);
+  FunctionContextTy = StructType::get(VoidPtrTy,         // __prev
+                                      Int32Ty,           // call_site
+                                      doubleUnderDataTy, // __data
+                                      VoidPtrTy,         // __personality
+                                      VoidPtrTy,         // __lsda
+                                      doubleUnderJBufTy, // __jbuf
                                       nullptr);
   RegisterFn = M.getOrInsertFunction(
       "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
@@ -204,16 +208,17 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
     IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
 
     // Reference the __data field.
-    Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data");
+    Value *FCData =
+        Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
 
     // The exception values come back in context->__data[0].
-    Value *ExceptionAddr =
-        Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep");
+    Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+                                                      0, 0, "exception_gep");
     Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
     ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
 
-    Value *SelectorAddr =
-        Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep");
+    Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
+                                                     0, 1, "exn_selector_gep");
     Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
     substituteLPadValues(LPI, ExnVal, SelVal);
@@ -223,15 +228,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   IRBuilder<> Builder(EntryBB->getTerminator());
   if (!PersonalityFn)
     PersonalityFn = LPads[0]->getPersonalityFn();
-  Value *PersonalityFieldPtr =
-      Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep");
+  Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(
+      FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep");
   Builder.CreateStore(
       Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()),
       PersonalityFieldPtr, /*isVolatile=*/true);
 
   // LSDA address
   Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr");
-  Value *LSDAFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 4, "lsda_gep");
+  Value *LSDAFieldPtr =
+      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep");
   Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true);
 
   return FuncCtx;
@@ -400,16 +406,19 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   IRBuilder<> Builder(EntryBB->getTerminator());
 
   // Get a reference to the jump buffer.
-  Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");
+  Value *JBufPtr =
+      Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep");
 
   // Save the frame pointer.
-  Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");
+  Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0,
+                                               "jbuf_fp_gep");
 
   Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
   Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);
 
   // Save the stack pointer.
-  Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");
+  Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2,
+                                               "jbuf_sp_gep");
 
   Val = Builder.CreateCall(StackAddrFn, "sp");
   Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index 622361e..03dd58d 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -70,7 +70,7 @@ public:
   static char ID; // Pass identification, replacement for typeid.
 
   SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {}
-  ~SpillPlacement() { releaseMemory(); }
+  ~SpillPlacement() override { releaseMemory(); }
 
   /// BorderConstraint - A basic block has separate constraints for entry and
   /// exit.
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 7572803..2bf2d64 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -464,7 +464,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
       continue;
     if (SlotRemap.count(VI.Slot)) {
       DEBUG(dbgs() << "Remapping debug info for ["
-                   << DIVariable(VI.Var).getName() << "].\n");
+                   << cast<MDLocalVariable>(VI.Var)->getName() << "].\n");
       VI.Slot = SlotRemap[VI.Slot];
       FixedDbg++;
     }
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 58a6d52..2162a51 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1246,20 +1246,13 @@ void TargetLoweringBase::computeRegisterProperties(
     ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
   }
 
-  // Decide how to handle f32. If the target does not have native support for
-  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  // Decide how to handle f32. If the target does not have native f32 support,
+  // expand it to i32 and we will be generating soft float library calls.
   if (!isTypeLegal(MVT::f32)) {
-    if (isTypeLegal(MVT::f64)) {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
-      TransformToType[MVT::f32] = MVT::f64;
-      ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger);
-    } else {
-      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
-      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
-      TransformToType[MVT::f32] = MVT::i32;
-      ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
-    }
+    NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+    RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+    TransformToType[MVT::f32] = MVT::i32;
+    ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
   }
 
   if (!isTypeLegal(MVT::f16)) {
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index bcf2aa7..5b795e4 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -245,9 +245,11 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
   return ".data.rel.ro";
 }
 
-static const MCSectionELF *selectELFSectionForGlobal(
-    MCContext &Ctx, const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags) {
+static const MCSectionELF *
+selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
+                          SectionKind Kind, Mangler &Mang,
+                          const TargetMachine &TM, bool EmitUniqueSection,
+                          unsigned Flags, unsigned *NextUniqueID) {
   unsigned EntrySize = 0;
   if (Kind.isMergeableCString()) {
     if (Kind.isMergeable2ByteCString()) {
@@ -297,9 +299,13 @@ static const MCSectionELF *selectELFSectionForGlobal(
     Name.push_back('.');
     TM.getNameWithPrefix(Name, GV, Mang, true);
   }
+  unsigned UniqueID = ~0;
+  if (EmitUniqueSection && !UniqueSectionNames) {
+    UniqueID = *NextUniqueID;
+    (*NextUniqueID)++;
+  }
   return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
-                           EntrySize, Group,
-                           EmitUniqueSection && !UniqueSectionNames);
+                           EntrySize, Group, UniqueID);
 }
 
 const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
@@ -319,7 +325,7 @@ const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
   EmitUniqueSection |= GV->hasComdat();
 
   return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
-                                   EmitUniqueSection, Flags);
+                                   EmitUniqueSection, Flags, &NextUniqueID);
 }
 
 const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
@@ -332,7 +338,8 @@ const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
     return ReadOnlySection;
 
   return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
-                                   Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC);
+                                   Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC,
+                                   &NextUniqueID);
 }
 
 bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index ab0f96e..35b944e 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -20,6 +20,8 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
@@ -33,6 +35,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <memory>
 
 using namespace llvm;
@@ -49,14 +52,15 @@ namespace {
 // frame allocation structure.
 typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
 
-typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
+// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't
+// quite null.
+AllocaInst *getCatchObjectSentinel() {
+  return static_cast<AllocaInst *>(nullptr) + 1;
+}
 
-enum ActionType { Catch, Cleanup };
+typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
 
 class LandingPadActions;
-class ActionHandler;
-class CatchHandler;
-class CleanupHandler;
 class LandingPadMap;
 
 typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
@@ -66,7 +70,7 @@ class WinEHPrepare : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid.
   WinEHPrepare(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID) {}
+      : FunctionPass(ID), DT(nullptr) {}
 
   bool runOnFunction(Function &Fn) override;
 
@@ -81,31 +85,62 @@ public:
 private:
   bool prepareExceptionHandlers(Function &F,
                                 SmallVectorImpl<LandingPadInst *> &LPads);
+  void promoteLandingPadValues(LandingPadInst *LPad);
+  void completeNestedLandingPad(Function *ParentFn,
+                                LandingPadInst *OutlinedLPad,
+                                const LandingPadInst *OriginalLPad,
+                                FrameVarInfoMap &VarInfo);
   bool outlineHandler(ActionHandler *Action, Function *SrcFn,
                       LandingPadInst *LPad, BasicBlock *StartBB,
                       FrameVarInfoMap &VarInfo);
+  void addStubInvokeToHandlerIfNeeded(Function *Handler, Value *PersonalityFn);
 
   void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
   CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
                                  VisitedBlockSet &VisitedBlocks);
-  CleanupHandler *findCleanupHandler(BasicBlock *StartBB, BasicBlock *EndBB);
+  void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB,
+                           BasicBlock *EndBB);
 
   void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
 
   // All fields are reset by runOnFunction.
+  DominatorTree *DT;
   EHPersonality Personality;
   CatchHandlerMapTy CatchHandlerMap;
   CleanupHandlerMapTy CleanupHandlerMap;
-  DenseMap<const LandingPadInst *, LandingPadMap>  LPadMaps;
+  DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
+
+  // This maps landing pad instructions found in outlined handlers to
+  // the landing pad instruction in the parent function from which they
+  // were cloned.  The cloned/nested landing pad is used as the key
+  // because the landing pad may be cloned into multiple handlers.
+  // This map will be used to add the llvm.eh.actions call to the nested
+  // landing pads after all handlers have been outlined.
+  DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP;
+
+  // This maps blocks in the parent function which are destinations of
+  // catch handlers to cloned blocks in (other) outlined handlers. This
+  // handles the case where a nested landing pads has a catch handler that
+  // returns to a handler function rather than the parent function.
+  // The original block is used as the key here because there should only
+  // ever be one handler function from which the cloned block is not pruned.
+  // The original block will be pruned from the parent function after all
+  // handlers have been outlined.  This map will be used to adjust the
+  // return instructions of handlers which return to the block that was
+  // outlined into a handler.  This is done after all handlers have been
+  // outlined but before the outlined code is pruned from the parent function.
+  DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
 };
 
 class WinEHFrameVariableMaterializer : public ValueMaterializer {
 public:
   WinEHFrameVariableMaterializer(Function *OutlinedFn,
                                  FrameVarInfoMap &FrameVarInfo);
-  ~WinEHFrameVariableMaterializer() {}
+  ~WinEHFrameVariableMaterializer() override {}
+
+  Value *materializeValueFor(Value *V) override;
 
-  virtual Value *materializeValueFor(Value *V) override;
+  void escapeCatchObject(Value *V);
 
 private:
   FrameVarInfoMap &FrameVarInfo;
@@ -119,42 +154,23 @@ public:
 
   bool isInitialized() { return OriginLPad != nullptr; }
 
-  bool mapIfEHPtrLoad(const LoadInst *Load) {
-    return mapIfEHLoad(Load, EHPtrStores, EHPtrStoreAddrs);
-  }
-  bool mapIfSelectorLoad(const LoadInst *Load) {
-    return mapIfEHLoad(Load, SelectorStores, SelectorStoreAddrs);
-  }
-
+  bool isOriginLandingPadBlock(const BasicBlock *BB) const;
   bool isLandingPadSpecificInst(const Instruction *Inst) const;
 
-  void remapSelector(ValueToValueMapTy &VMap, Value *MappedValue) const;
+  void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
+                     Value *SelectorValue) const;
 
 private:
-  bool mapIfEHLoad(const LoadInst *Load,
-                   SmallVectorImpl<const StoreInst *> &Stores,
-                   SmallVectorImpl<const Value *> &StoreAddrs);
-
   const LandingPadInst *OriginLPad;
   // We will normally only see one of each of these instructions, but
   // if more than one occurs for some reason we can handle that.
   TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
   TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
-
-  // In optimized code, there will typically be at most one instance of
-  // each of the following, but in unoptimized IR it is not uncommon
-  // for the values to be stored, loaded and then stored again.  In that
-  // case we will create a second entry for each store and store address.
-  SmallVector<const StoreInst *, 2> EHPtrStores;
-  SmallVector<const StoreInst *, 2> SelectorStores;
-  SmallVector<const Value *, 2> EHPtrStoreAddrs;
-  SmallVector<const Value *, 2> SelectorStoreAddrs;
 };
 
 class WinEHCloningDirectorBase : public CloningDirector {
 public:
-  WinEHCloningDirectorBase(Function *HandlerFn,
-                           FrameVarInfoMap &VarInfo,
+  WinEHCloningDirectorBase(Function *HandlerFn, FrameVarInfoMap &VarInfo,
                            LandingPadMap &LPadMap)
       : Materializer(HandlerFn, VarInfo),
         SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
@@ -180,6 +196,9 @@ public:
   virtual CloningAction handleResume(ValueToValueMapTy &VMap,
                                      const ResumeInst *Resume,
                                      BasicBlock *NewBB) = 0;
+  virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                         const LandingPadInst *LPad,
+                                         BasicBlock *NewBB) = 0;
 
   ValueMaterializer *getValueMaterializer() override { return &Materializer; }
 
@@ -192,11 +211,13 @@ protected:
 
 class WinEHCatchDirector : public WinEHCloningDirectorBase {
 public:
-  WinEHCatchDirector(Function *CatchFn, Value *Selector,
-                     FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+  WinEHCatchDirector(
+      Function *CatchFn, Value *Selector, FrameVarInfoMap &VarInfo,
+      LandingPadMap &LPadMap,
+      DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads)
       : WinEHCloningDirectorBase(CatchFn, VarInfo, LPadMap),
         CurrentSelector(Selector->stripPointerCasts()),
-        ExceptionObjectVar(nullptr) {}
+        ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads) {}
 
   CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
                                  const Instruction *Inst,
@@ -210,21 +231,28 @@ public:
                              BasicBlock *NewBB) override;
   CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
                              BasicBlock *NewBB) override;
+  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                 const LandingPadInst *LPad,
+                                 BasicBlock *NewBB) override;
 
-  const Value *getExceptionVar() { return ExceptionObjectVar; }
+  Value *getExceptionVar() { return ExceptionObjectVar; }
   TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
 
 private:
   Value *CurrentSelector;
 
-  const Value *ExceptionObjectVar;
+  Value *ExceptionObjectVar;
   TinyPtrVector<BasicBlock *> ReturnTargets;
+
+  // This will be a reference to the field of the same name in the WinEHPrepare
+  // object which instantiates this WinEHCatchDirector object.
+  DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP;
 };
 
 class WinEHCleanupDirector : public WinEHCloningDirectorBase {
 public:
-  WinEHCleanupDirector(Function *CleanupFn,
-                       FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+  WinEHCleanupDirector(Function *CleanupFn, FrameVarInfoMap &VarInfo,
+                       LandingPadMap &LPadMap)
       : WinEHCloningDirectorBase(CleanupFn, VarInfo, LPadMap) {}
 
   CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
@@ -239,66 +267,9 @@ public:
                              BasicBlock *NewBB) override;
   CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
                              BasicBlock *NewBB) override;
-};
-
-class ActionHandler {
-public:
-  ActionHandler(BasicBlock *BB, ActionType Type)
-      : StartBB(BB), Type(Type), HandlerBlockOrFunc(nullptr) {}
-
-  ActionType getType() const { return Type; }
-  BasicBlock *getStartBlock() const { return StartBB; }
-
-  bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; }
-
-  void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; }
-  Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; }
-
-private:
-  BasicBlock *StartBB;
-  ActionType Type;
-
-  // Can be either a BlockAddress or a Function depending on the EH personality.
-  Constant *HandlerBlockOrFunc;
-};
-
-class CatchHandler : public ActionHandler {
-public:
-  CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB)
-      : ActionHandler(BB, ActionType::Catch), Selector(Selector),
-        NextBB(NextBB), ExceptionObjectVar(nullptr) {}
-
-  // Method for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ActionHandler *H) {
-    return H->getType() == ActionType::Catch;
-  }
-
-  Constant *getSelector() const { return Selector; }
-  BasicBlock *getNextBB() const { return NextBB; }
-
-  const Value *getExceptionVar() { return ExceptionObjectVar; }
-  TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
-
-  void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; }
-  void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) {
-    ReturnTargets = Targets;
-  }
-
-private:
-  Constant *Selector;
-  BasicBlock *NextBB;
-  const Value *ExceptionObjectVar;
-  TinyPtrVector<BasicBlock *> ReturnTargets;
-};
-
-class CleanupHandler : public ActionHandler {
-public:
-  CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {}
-
-  // Method for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ActionHandler *H) {
-    return H->getType() == ActionType::Cleanup;
-  }
+  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
+                                 const LandingPadInst *LPad,
+                                 BasicBlock *NewBB) override;
 };
 
 class LandingPadActions {
@@ -313,6 +284,7 @@ public:
 
   bool includesCleanup() const { return HasCleanupHandlers; }
 
+  SmallVectorImpl<ActionHandler *> &actions() { return Actions; }
   SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
   SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
 
@@ -336,8 +308,8 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
 
 // FIXME: Remove this once the backend can handle the prepared IR.
 static cl::opt<bool>
-SEHPrepare("sehprepare", cl::Hidden,
-           cl::desc("Prepare functions with SEH personalities"));
+    SEHPrepare("sehprepare", cl::Hidden,
+               cl::desc("Prepare functions with SEH personalities"));
 
 bool WinEHPrepare::runOnFunction(Function &Fn) {
   SmallVector<LandingPadInst *, 4> LPads;
@@ -360,6 +332,8 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
   if (!isMSVCEHPersonality(Personality))
     return false;
 
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
   if (isAsynchronousEHPersonality(Personality) && !SEHPrepare) {
     // Replace all resume instructions with unreachable.
     // FIXME: Remove this once the backend can handle the prepared IR.
@@ -375,11 +349,11 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
   return true;
 }
 
-bool WinEHPrepare::doFinalization(Module &M) {
-  return false;
-}
+bool WinEHPrepare::doFinalization(Module &M) { return false; }
 
-void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+}
 
 bool WinEHPrepare::prepareExceptionHandlers(
     Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
@@ -422,9 +396,14 @@ bool WinEHPrepare::prepareExceptionHandlers(
     if (LPadHasActionList)
       continue;
 
+    // If either of the values in the aggregate returned by the landing pad is
+    // extracted and stored to memory, promote the stored value to a register.
+    promoteLandingPadValues(LPad);
+
     LandingPadActions Actions;
     mapLandingPadBlocks(LPad, Actions);
 
+    HandlersOutlined |= !Actions.actions().empty();
     for (ActionHandler *Action : Actions) {
       if (Action->hasBeenProcessed())
         continue;
@@ -436,24 +415,17 @@ bool WinEHPrepare::prepareExceptionHandlers(
       if (isAsynchronousEHPersonality(Personality)) {
         if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
           processSEHCatchHandler(CatchAction, StartBB);
-          HandlersOutlined = true;
           continue;
         }
       }
 
-      if (outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo)) {
-        HandlersOutlined = true;
-      }
-    } // End for each Action
-
-    // FIXME: We need a guard against partially outlined functions.
-    if (!HandlersOutlined)
-      continue;
+      outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo);
+    }
 
     // Replace the landing pad with a new llvm.eh.action based landing pad.
     BasicBlock *NewLPadBB = BasicBlock::Create(Context, "lpad", &F, LPadBB);
     assert(!isa<PHINode>(LPadBB->begin()));
-    Instruction *NewLPad = LPad->clone();
+    auto *NewLPad = cast<LandingPadInst>(LPad->clone());
     NewLPadBB->getInstList().push_back(NewLPad);
     while (!pred_empty(LPadBB)) {
       auto *pred = *pred_begin(LPadBB);
@@ -461,6 +433,19 @@ bool WinEHPrepare::prepareExceptionHandlers(
       Invoke->setUnwindDest(NewLPadBB);
     }
 
+    // If anyone is still using the old landingpad value, just give them undef
+    // instead. The eh pointer and selector values are not real.
+    LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
+
+    // Replace the mapping of any nested landing pad that previously mapped
+    // to this landing pad with a referenced to the cloned version.
+    for (auto &LPadPair : NestedLPtoOriginalLP) {
+      const LandingPadInst *OriginalLPad = LPadPair.second;
+      if (OriginalLPad == LPad) {
+        LPadPair.second = NewLPad;
+      }
+    }
+
     // Replace uses of the old lpad in phis with this block and delete the old
     // block.
     LPadBB->replaceSuccessorsPhiUsesWith(NewLPadBB);
@@ -474,11 +459,17 @@ bool WinEHPrepare::prepareExceptionHandlers(
       if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
         ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
         ActionArgs.push_back(CatchAction->getSelector());
+        // Find the frame escape index of the exception object alloca in the
+        // parent.
+        int FrameEscapeIdx = -1;
         Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
-        if (EHObj)
-          ActionArgs.push_back(EHObj);
-        else
-          ActionArgs.push_back(ConstantPointerNull::get(Int8PtrType));
+        if (EHObj && !isa<ConstantPointerNull>(EHObj)) {
+          auto I = FrameVarInfo.find(EHObj);
+          assert(I != FrameVarInfo.end() &&
+                 "failed to map llvm.eh.begincatch var");
+          FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I);
+        }
+        ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx));
       } else {
         ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
       }
@@ -502,6 +493,15 @@ bool WinEHPrepare::prepareExceptionHandlers(
   if (!HandlersOutlined)
     return false;
 
+  // Replace any nested landing pad stubs with the correct action handler.
+  // This must be done before we remove unreachable blocks because it
+  // cleans up references to outlined blocks that will be deleted.
+  for (auto &LPadPair : NestedLPtoOriginalLP)
+    completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo);
+  NestedLPtoOriginalLP.clear();
+
+  F.addFnAttr("wineh-parent", F.getName());
+
   // Delete any blocks that were only used by handlers that were outlined above.
   removeUnreachableBlocks(F);
 
@@ -554,26 +554,26 @@ bool WinEHPrepare::prepareExceptionHandlers(
           ++InsertPt;
           ParentAlloca =
               new AllocaInst(ParentInst->getType(), nullptr,
-                             ParentInst->getName() + ".reg2mem", InsertPt);
+                             ParentInst->getName() + ".reg2mem",
+                             AllocaInsertPt);
           new StoreInst(ParentInst, ParentAlloca, InsertPt);
         } else {
-          ParentAlloca = DemoteRegToStack(*ParentInst, true, ParentInst);
+          ParentAlloca = DemoteRegToStack(*ParentInst, true, AllocaInsertPt);
         }
       }
     }
 
-    // If the parent alloca is no longer used and only one of the handlers used
-    // it, erase the parent and leave the copy in the outlined handler.
-    if (ParentAlloca->getNumUses() == 0 && Allocas.size() == 1) {
-      ParentAlloca->eraseFromParent();
-      continue;
-    }
+    // FIXME: We should try to sink unescaped allocas from the parent frame into
+    // the child frame. If the alloca is escaped, we have to use the lifetime
+    // markers to ensure that the alloca is only live within the child frame.
 
     // Add this alloca to the list of things to escape.
     AllocasToEscape.push_back(ParentAlloca);
 
     // Next replace all outlined allocas that are mapped to it.
     for (AllocaInst *TempAlloca : Allocas) {
+      if (TempAlloca == getCatchObjectSentinel())
+        continue; // Skip catch parameter sentinels.
       Function *HandlerFn = TempAlloca->getParent()->getParent();
       // FIXME: Sink this GEP into the blocks where it is used.
       Builder.SetInsertPoint(TempAlloca);
@@ -601,19 +601,6 @@ bool WinEHPrepare::prepareExceptionHandlers(
   Builder.SetInsertPoint(&F.getEntryBlock().back());
   Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
 
-  // Insert an alloca for the EH state in the entry block. On x86, we will also
-  // insert stores to update the EH state, but on other ISAs, the runtime does
-  // it for us.
-  // FIXME: This record is different on x86.
-  Type *UnwindHelpTy = Type::getInt64Ty(Context);
-  AllocaInst *UnwindHelp =
-      new AllocaInst(UnwindHelpTy, "unwindhelp", &F.getEntryBlock().front());
-  Builder.CreateStore(llvm::ConstantInt::get(UnwindHelpTy, -2), UnwindHelp);
-  Function *UnwindHelpFn =
-      Intrinsic::getDeclaration(M, Intrinsic::eh_unwindhelp);
-  Builder.CreateCall(UnwindHelpFn,
-                     Builder.CreateBitCast(UnwindHelp, Int8PtrType));
-
   // Clean up the handler action maps we created for this function
   DeleteContainerSeconds(CatchHandlerMap);
   CatchHandlerMap.clear();
@@ -623,6 +610,125 @@ bool WinEHPrepare::prepareExceptionHandlers(
   return HandlersOutlined;
 }
 
+void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) {
+  // If the return values of the landing pad instruction are extracted and
+  // stored to memory, we want to promote the store locations to reg values.
+  SmallVector<AllocaInst *, 2> EHAllocas;
+
+  // The landingpad instruction returns an aggregate value.  Typically, its
+  // value will be passed to a pair of extract value instructions and the
+  // results of those extracts are often passed to store instructions.
+  // In unoptimized code the stored value will often be loaded and then stored
+  // again.
+  for (auto *U : LPad->users()) {
+    ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
+    if (!Extract)
+      continue;
+
+    for (auto *EU : Extract->users()) {
+      if (auto *Store = dyn_cast<StoreInst>(EU)) {
+        auto *AV = cast<AllocaInst>(Store->getPointerOperand());
+        EHAllocas.push_back(AV);
+      }
+    }
+  }
+
+  // We can't do this without a dominator tree.
+  assert(DT);
+
+  if (!EHAllocas.empty()) {
+    PromoteMemToReg(EHAllocas, *DT);
+    EHAllocas.clear();
+  }
+
+  // After promotion, some extracts may be trivially dead. Remove them.
+  SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end());
+  for (auto *U : Users)
+    RecursivelyDeleteTriviallyDeadInstructions(U);
+}
+
+void WinEHPrepare::completeNestedLandingPad(Function *ParentFn,
+                                            LandingPadInst *OutlinedLPad,
+                                            const LandingPadInst *OriginalLPad,
+                                            FrameVarInfoMap &FrameVarInfo) {
+  // Get the nested block and erase the unreachable instruction that was
+  // temporarily inserted as its terminator.
+  LLVMContext &Context = ParentFn->getContext();
+  BasicBlock *OutlinedBB = OutlinedLPad->getParent();
+  assert(isa<UnreachableInst>(OutlinedBB->getTerminator()));
+  OutlinedBB->getTerminator()->eraseFromParent();
+  // That should leave OutlinedLPad as the last instruction in its block.
+  assert(&OutlinedBB->back() == OutlinedLPad);
+
+  // The original landing pad will have already had its action intrinsic
+  // built by the outlining loop.  We need to clone that into the outlined
+  // location.  It may also be necessary to add references to the exception
+  // variables to the outlined handler in which this landing pad is nested
+  // and remap return instructions in the nested handlers that should return
+  // to an address in the outlined handler.
+  Function *OutlinedHandlerFn = OutlinedBB->getParent();
+  BasicBlock::const_iterator II = OriginalLPad;
+  ++II;
+  // The instruction after the landing pad should now be a call to eh.actions.
+  const Instruction *Recover = II;
+  assert(match(Recover, m_Intrinsic<Intrinsic::eh_actions>()));
+  IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover->clone());
+
+  // Remap the exception variables into the outlined function.
+  WinEHFrameVariableMaterializer Materializer(OutlinedHandlerFn, FrameVarInfo);
+  SmallVector<BlockAddress *, 4> ActionTargets;
+  SmallVector<ActionHandler *, 4> ActionList;
+  parseEHActions(EHActions, ActionList);
+  for (auto *Action : ActionList) {
+    auto *Catch = dyn_cast<CatchHandler>(Action);
+    if (!Catch)
+      continue;
+    // The dyn_cast to function here selects C++ catch handlers and skips
+    // SEH catch handlers.
+    auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc());
+    if (!Handler)
+      continue;
+    // Visit all the return instructions, looking for places that return
+    // to a location within OutlinedHandlerFn.
+    for (BasicBlock &NestedHandlerBB : *Handler) {
+      auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator());
+      if (!Ret)
+        continue;
+
+      // Handler functions must always return a block address.
+      BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
+      // The original target will have been in the main parent function,
+      // but if it is the address of a block that has been outlined, it
+      // should be a block that was outlined into OutlinedHandlerFn.
+      assert(BA->getFunction() == ParentFn);
+
+      // Ignore targets that aren't part of OutlinedHandlerFn.
+      if (!LPadTargetBlocks.count(BA->getBasicBlock()))
+        continue;
+
+      // If the return value is the address ofF a block that we
+      // previously outlined into the parent handler function, replace
+      // the return instruction and add the mapped target to the list
+      // of possible return addresses.
+      BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()];
+      assert(MappedBB->getParent() == OutlinedHandlerFn);
+      BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB);
+      Ret->eraseFromParent();
+      ReturnInst::Create(Context, NewBA, &NestedHandlerBB);
+      ActionTargets.push_back(NewBA);
+    }
+  }
+  DeleteContainerPointers(ActionList);
+  ActionList.clear();
+  OutlinedBB->getInstList().push_back(EHActions);
+
+  // Insert an indirect branch into the outlined landing pad BB.
+  IndirectBrInst *IBr = IndirectBrInst::Create(EHActions, 0, OutlinedBB);
+  // Add the previously collected action targets.
+  for (auto *Target : ActionTargets)
+    IBr->addDestination(Target->getBasicBlock());
+}
+
 // This function examines a block to determine whether the block ends with a
 // conditional branch to a catch handler based on a selector comparison.
 // This function is used both by the WinEHPrepare::findSelectorComparison() and
@@ -657,6 +763,59 @@ static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
   return false;
 }
 
+static BasicBlock *createStubLandingPad(Function *Handler,
+                                        Value *PersonalityFn) {
+  // FIXME: Finish this!
+  LLVMContext &Context = Handler->getContext();
+  BasicBlock *StubBB = BasicBlock::Create(Context, "stub");
+  Handler->getBasicBlockList().push_back(StubBB);
+  IRBuilder<> Builder(StubBB);
+  LandingPadInst *LPad = Builder.CreateLandingPad(
+      llvm::StructType::get(Type::getInt8PtrTy(Context),
+                            Type::getInt32Ty(Context), nullptr),
+      PersonalityFn, 0);
+  LPad->setCleanup(true);
+  Builder.CreateUnreachable();
+  return StubBB;
+}
+
+// Cycles through the blocks in an outlined handler function looking for an
+// invoke instruction and inserts an invoke of llvm.donothing with an empty
+// landing pad if none is found.  The code that generates the .xdata tables for
+// the handler needs at least one landing pad to identify the parent function's
+// personality.
+void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler,
+                                                  Value *PersonalityFn) {
+  ReturnInst *Ret = nullptr;
+  for (BasicBlock &BB : *Handler) {
+    TerminatorInst *Terminator = BB.getTerminator();
+    // If we find an invoke, there is nothing to be done.
+    auto *II = dyn_cast<InvokeInst>(Terminator);
+    if (II)
+      return;
+    // If we've already recorded a return instruction, keep looking for invokes.
+    if (Ret)
+      continue;
+    // If we haven't recorded a return instruction yet, try this terminator.
+    Ret = dyn_cast<ReturnInst>(Terminator);
+  }
+
+  // If we got this far, the handler contains no invokes.  We should have seen
+  // at least one return.  We'll insert an invoke of llvm.donothing ahead of
+  // that return.
+  assert(Ret);
+  BasicBlock *OldRetBB = Ret->getParent();
+  BasicBlock *NewRetBB = SplitBlock(OldRetBB, Ret);
+  // SplitBlock adds an unconditional branch instruction at the end of the
+  // parent block.  We want to replace that with an invoke call, so we can
+  // erase it now.
+  OldRetBB->getTerminator()->eraseFromParent();
+  BasicBlock *StubLandingPad = createStubLandingPad(Handler, PersonalityFn);
+  Function *F =
+      Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing);
+  InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB);
+}
+
 bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
                                   LandingPadInst *LPad, BasicBlock *StartBB,
                                   FrameVarInfoMap &VarInfo) {
@@ -680,6 +839,8 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
                                SrcFn->getName() + ".cleanup", M);
   }
 
+  Handler->addFnAttr("wineh-parent", SrcFn->getName());
+
   // Generate a standard prolog to setup the frame recovery structure.
   IRBuilder<> Builder(Context);
   BasicBlock *Entry = BasicBlock::Create(Context, "entry");
@@ -696,10 +857,14 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
     LPadMap.mapLandingPad(LPad);
   if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
     Constant *Sel = CatchAction->getSelector();
-    Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap));
-    LPadMap.remapSelector(VMap, ConstantInt::get(Type::getInt32Ty(Context), 1));
+    Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap,
+                                          NestedLPtoOriginalLP));
+    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
+                          ConstantInt::get(Type::getInt32Ty(Context), 1));
   } else {
     Director.reset(new WinEHCleanupDirector(Handler, VarInfo, LPadMap));
+    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
+                          UndefValue::get(Type::getInt32Ty(Context)));
   }
 
   SmallVector<ReturnInst *, 8> Returns;
@@ -735,12 +900,45 @@ bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
   Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList());
   FirstClonedBB->eraseFromParent();
 
+  // Make sure we can identify the handler's personality later.
+  addStubInvokeToHandlerIfNeeded(Handler, LPad->getPersonalityFn());
+
   if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
     WinEHCatchDirector *CatchDirector =
         reinterpret_cast<WinEHCatchDirector *>(Director.get());
     CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
     CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
-  }
+
+    // Look for blocks that are not part of the landing pad that we just
+    // outlined but terminate with a call to llvm.eh.endcatch and a
+    // branch to a block that is in the handler we just outlined.
+    // These blocks will be part of a nested landing pad that intends to
+    // return to an address in this handler.  This case is best handled
+    // after both landing pads have been outlined, so for now we'll just
+    // save the association of the blocks in LPadTargetBlocks.  The
+    // return instructions which are created from these branches will be
+    // replaced after all landing pads have been outlined.
+    for (const auto MapEntry : VMap) {
+      // VMap maps all values and blocks that were just cloned, but dead
+      // blocks which were pruned will map to nullptr.
+      if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr)
+        continue;
+      const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first);
+      for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) {
+        auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator());
+        if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1)
+          continue;
+        BasicBlock::iterator II = const_cast<BranchInst *>(Branch);
+        --II;
+        if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) {
+          // This would indicate that a nested landing pad wants to return
+          // to a block that is outlined into two different handlers.
+          assert(!LPadTargetBlocks.count(MappedBB));
+          LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second);
+        }
+      }
+    }
+  } // End if (CatchAction)
 
   Action->setHandlerBlockOrFunc(Handler);
 
@@ -787,9 +985,8 @@ void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
 
   // The landingpad instruction returns an aggregate value.  Typically, its
   // value will be passed to a pair of extract value instructions and the
-  // results of those extracts are often passed to store instructions.
-  // In unoptimized code the stored value will often be loaded and then stored
-  // again.
+  // results of those extracts will have been promoted to reg values before
+  // this routine is called.
   for (auto *U : LPad->users()) {
     const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
     if (!Extract)
@@ -800,36 +997,17 @@ void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
     assert((Idx == 0 || Idx == 1) &&
            "Unexpected operation: extracting an unknown landing pad element");
     if (Idx == 0) {
-      // Element 0 doesn't directly corresponds to anything in the WinEH
-      // scheme.
-      // It will be stored to a memory location, then later loaded and finally
-      // the loaded value will be used as the argument to an
-      // llvm.eh.begincatch
-      // call.  We're tracking it here so that we can skip the store and load.
       ExtractedEHPtrs.push_back(Extract);
     } else if (Idx == 1) {
-      // Element 1 corresponds to the filter selector.  We'll map it to 1 for
-      // matching purposes, but it will also probably be stored to memory and
-      // reloaded, so we need to track the instuction so that we can map the
-      // loaded value too.
       ExtractedSelectors.push_back(Extract);
     }
-
-    // Look for stores of the extracted values.
-    for (auto *EU : Extract->users()) {
-      if (auto *Store = dyn_cast<StoreInst>(EU)) {
-        if (Idx == 1) {
-          SelectorStores.push_back(Store);
-          SelectorStoreAddrs.push_back(Store->getPointerOperand());
-        } else {
-          EHPtrStores.push_back(Store);
-          EHPtrStoreAddrs.push_back(Store->getPointerOperand());
-        }
-      }
-    }
   }
 }
 
+bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const {
+  return BB->getLandingPadInst() == OriginLPad;
+}
+
 bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
   if (Inst == OriginLPad)
     return true;
@@ -841,47 +1019,16 @@ bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
     if (Inst == Extract)
       return true;
   }
-  for (auto *Store : EHPtrStores) {
-    if (Inst == Store)
-      return true;
-  }
-  for (auto *Store : SelectorStores) {
-    if (Inst == Store)
-      return true;
-  }
-
   return false;
 }
 
-void LandingPadMap::remapSelector(ValueToValueMapTy &VMap,
-                                     Value *MappedValue) const {
-  // Remap all selector extract instructions to the specified value.
+void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
+                                  Value *SelectorValue) const {
+  // Remap all landing pad extract instructions to the specified values.
+  for (auto *Extract : ExtractedEHPtrs)
+    VMap[Extract] = EHPtrValue;
   for (auto *Extract : ExtractedSelectors)
-    VMap[Extract] = MappedValue;
-}
-
-bool LandingPadMap::mapIfEHLoad(const LoadInst *Load,
-                                   SmallVectorImpl<const StoreInst *> &Stores,
-                                   SmallVectorImpl<const Value *> &StoreAddrs) {
-  // This makes the assumption that a store we've previously seen dominates
-  // this load instruction.  That might seem like a rather huge assumption,
-  // but given the way that landingpads are constructed its fairly safe.
-  // FIXME: Add debug/assert code that verifies this.
-  const Value *LoadAddr = Load->getPointerOperand();
-  for (auto *StoreAddr : StoreAddrs) {
-    if (LoadAddr == StoreAddr) {
-      // Handle the common debug scenario where this loaded value is stored
-      // to a different location.
-      for (auto *U : Load->users()) {
-        if (auto *Store = dyn_cast<StoreInst>(U)) {
-          Stores.push_back(Store);
-          StoreAddrs.push_back(Store->getPointerOperand());
-        }
-      }
-      return true;
-    }
-  }
-  return false;
+    VMap[Extract] = SelectorValue;
 }
 
 CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
@@ -891,40 +1038,13 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
   if (LPadMap.isLandingPadSpecificInst(Inst))
     return CloningDirector::SkipInstruction;
 
-  if (auto *Load = dyn_cast<LoadInst>(Inst)) {
-    // Look for loads of (previously suppressed) landingpad values.
-    // The EHPtr load can be mapped to an undef value as it should only be used
-    // as an argument to llvm.eh.begincatch, but the selector value needs to be
-    // mapped to a constant value of 1.  This value will be used to simplify the
-    // branching to always flow to the current handler.
-    if (LPadMap.mapIfSelectorLoad(Load)) {
-      VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
-      return CloningDirector::SkipInstruction;
-    }
-    if (LPadMap.mapIfEHPtrLoad(Load)) {
-      VMap[Inst] = UndefValue::get(Int8PtrType);
-      return CloningDirector::SkipInstruction;
-    }
-
-    // Any other loads just get cloned.
-    return CloningDirector::CloneInstruction;
-  }
-
   // Nested landing pads will be cloned as stubs, with just the
   // landingpad instruction and an unreachable instruction. When
   // all landingpads have been outlined, we'll replace this with the
   // llvm.eh.actions call and indirect branch created when the
   // landing pad was outlined.
-  if (auto *NestedLPad = dyn_cast<LandingPadInst>(Inst)) {
-    Instruction *NewInst = NestedLPad->clone();
-    if (NestedLPad->hasName())
-      NewInst->setName(NestedLPad->getName());
-    // FIXME: Store this mapping somewhere else also.
-    VMap[NestedLPad] = NewInst;
-    BasicBlock::InstListType &InstList = NewBB->getInstList();
-    InstList.push_back(NewInst);
-    InstList.push_back(new UnreachableInst(NewBB->getContext()));
-    return CloningDirector::StopCloningBB;
+  if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) {
+    return handleLandingPad(VMap, LPad, NewBB);
   }
 
   if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
@@ -944,6 +1064,20 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
   return CloningDirector::CloneInstruction;
 }
 
+CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad(
+    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
+  Instruction *NewInst = LPad->clone();
+  if (LPad->hasName())
+    NewInst->setName(LPad->getName());
+  // Save this correlation for later processing.
+  NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad;
+  VMap[LPad] = NewInst;
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(NewInst);
+  InstList.push_back(new UnreachableInst(NewBB->getContext()));
+  return CloningDirector::StopCloningBB;
+}
+
 CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
     ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
   // The argument to the call is some form of the first element of the
@@ -958,6 +1092,11 @@ CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
                                           "llvm.eh.begincatch found while "
                                           "outlining catch handler.");
   ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
+  if (isa<ConstantPointerNull>(ExceptionObjectVar))
+    return CloningDirector::SkipInstruction;
+  assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() &&
+         "catch parameter is not static alloca");
+  Materializer.escapeCatchObject(ExceptionObjectVar);
   return CloningDirector::SkipInstruction;
 }
 
@@ -971,27 +1110,32 @@ WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
 
   // The end catch call can occur in one of two places: either in a
   // landingpad block that is part of the catch handlers exception mechanism,
-  // or at the end of the catch block.  If it occurs in a landing pad, we must
-  // skip it and continue so that the landing pad gets cloned.
-  // FIXME: This case isn't fully supported yet and shouldn't turn up in any
-  //        of the test cases until it is.
-  if (IntrinCall->getParent()->isLandingPad())
+  // or at the end of the catch block.  However, a catch-all handler may call
+  // end catch from the original landing pad.  If the call occurs in a nested
+  // landing pad block, we must skip it and continue so that the landing pad
+  // gets cloned.
+  auto *ParentBB = IntrinCall->getParent();
+  if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB))
     return CloningDirector::SkipInstruction;
 
-  // If an end catch occurs anywhere else the next instruction should be an
-  // unconditional branch instruction that we want to replace with a return
-  // to the the address of the branch target.
-  const BasicBlock *EndCatchBB = IntrinCall->getParent();
-  const TerminatorInst *Terminator = EndCatchBB->getTerminator();
-  const BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
-  assert(Branch && Branch->isUnconditional());
-  assert(std::next(BasicBlock::const_iterator(IntrinCall)) ==
-         BasicBlock::const_iterator(Branch));
-
-  BasicBlock *ContinueLabel = Branch->getSuccessor(0);
-  ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueLabel),
-                     NewBB);
-  ReturnTargets.push_back(ContinueLabel);
+  // If an end catch occurs anywhere else we want to terminate the handler
+  // with a return to the code that follows the endcatch call.  If the
+  // next instruction is not an unconditional branch, we need to split the
+  // block to provide a clear target for the return instruction.
+  BasicBlock *ContinueBB;
+  auto Next = std::next(BasicBlock::const_iterator(IntrinCall));
+  const BranchInst *Branch = dyn_cast<BranchInst>(Next);
+  if (!Branch || !Branch->isUnconditional()) {
+    // We're interrupting the cloning process at this location, so the
+    // const_cast we're doing here will not cause a problem.
+    ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB),
+                            const_cast<Instruction *>(cast<Instruction>(Next)));
+  } else {
+    ContinueBB = Branch->getSuccessor(0);
+  }
+
+  ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB);
+  ReturnTargets.push_back(ContinueBB);
 
   // We just added a terminator to the cloned block.
   // Tell the caller to stop processing the current basic block so that
@@ -1029,6 +1173,20 @@ WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
   return CloningDirector::StopCloningBB;
 }
 
+CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad(
+    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
+  // The MS runtime will terminate the process if an exception occurs in a
+  // cleanup handler, so we shouldn't encounter landing pads in the actual
+  // cleanup code, but they may appear in catch blocks.  Depending on where
+  // we started cloning we may see one, but it will get dropped during dead
+  // block pruning.
+  Instruction *NewInst = new UnreachableInst(NewBB->getContext());
+  VMap[LPad] = NewInst;
+  BasicBlock::InstListType &InstList = NewBB->getInstList();
+  InstList.push_back(NewInst);
+  return CloningDirector::StopCloningBB;
+}
+
 CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
     ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
   // Catch blocks within cleanup handlers will always be unreachable.
@@ -1041,12 +1199,9 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
 
 CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
     ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
-  // Catch blocks within cleanup handlers will always be unreachable.
-  // We'll insert an unreachable instruction now, but it will be pruned
-  // before the cloning process is complete.
-  BasicBlock::InstListType &InstList = NewBB->getInstList();
-  InstList.push_back(new UnreachableInst(NewBB->getContext()));
-  return CloningDirector::StopCloningBB;
+  // Cleanup handlers nested within catch handlers may begin with a call to
+  // eh.endcatch.  We can just ignore that instruction.
+  return CloningDirector::SkipInstruction;
 }
 
 CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
@@ -1080,6 +1235,9 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
   NewCall->setDebugLoc(Invoke->getDebugLoc());
   VMap[Invoke] = NewCall;
 
+  // Remap the operands.
+  llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer);
+
   // Insert an unconditional branch to the normal destination.
   BranchInst::Create(Invoke->getNormalDest(), NewBB);
 
@@ -1088,7 +1246,7 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
 
   // We just added a terminator to the cloned block.
   // Tell the caller to stop processing the current basic block.
-  return CloningDirector::StopCloningBB;
+  return CloningDirector::CloneSuccessors;
 }
 
 CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
@@ -1104,7 +1262,8 @@ CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
 WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
     Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo)
     : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
-  Builder.SetInsertPoint(&OutlinedFn->getEntryBlock());
+  BasicBlock *EntryBB = &OutlinedFn->getEntryBlock();
+  Builder.SetInsertPoint(EntryBB, EntryBB->getFirstInsertionPt());
 }
 
 Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
@@ -1139,6 +1298,15 @@ Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
   return nullptr;
 }
 
+void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
+  // Catch parameter objects have to live in the parent frame. When we see a use
+  // of a catch parameter, add a sentinel to the multimap to indicate that it's
+  // used from another handler. This will prevent us from trying to sink the
+  // alloca into the handler and ensure that the catch parameter is present in
+  // the call to llvm.frameescape.
+  FrameVarInfo[V].push_back(getCatchObjectSentinel());
+}
+
 // This function maps the catch and cleanup handlers that are reachable from the
 // specified landing pad. The landing pad sequence will have this basic shape:
 //
@@ -1176,13 +1344,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
   DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
 
   if (NumClauses == 0) {
-    // This landing pad contains only cleanup code.
-    CleanupHandler *Action = new CleanupHandler(BB);
-    CleanupHandlerMap[BB] = Action;
-    Actions.insertCleanupHandler(Action);
-    DEBUG(dbgs() << "  Assuming cleanup code in block " << BB->getName()
-                 << "\n");
-    assert(LPad->isCleanup());
+    findCleanupHandlers(Actions, BB, nullptr);
     return;
   }
 
@@ -1202,14 +1364,8 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
       // exceptions but code called from catches can. For SEH, it isn't
       // important if some finally code before a catch-all is executed out of
       // line or after recovering from the exception.
-      if (Personality == EHPersonality::MSVC_CXX) {
-        if (auto *CleanupAction = findCleanupHandler(BB, BB)) {
-          //   Add a cleanup entry to the list
-          Actions.insertCleanupHandler(CleanupAction);
-          DEBUG(dbgs() << "  Found cleanup code in block "
-                       << CleanupAction->getStartBlock()->getName() << "\n");
-        }
-      }
+      if (Personality == EHPersonality::MSVC_CXX)
+        findCleanupHandlers(Actions, BB, BB);
 
       // Add the catch handler to the action list.
       CatchHandler *Action =
@@ -1226,13 +1382,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
 
     CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
     // See if there is any interesting code executed before the dispatch.
-    if (auto *CleanupAction =
-            findCleanupHandler(BB, CatchAction->getStartBlock())) {
-      //   Add a cleanup entry to the list
-      Actions.insertCleanupHandler(CleanupAction);
-      DEBUG(dbgs() << "  Found cleanup code in block "
-                   << CleanupAction->getStartBlock()->getName() << "\n");
-    }
+    findCleanupHandlers(Actions, BB, CatchAction->getStartBlock());
 
     assert(CatchAction);
     ++HandlersFound;
@@ -1248,12 +1398,7 @@ void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
 
   // If we didn't wind up in a catch-all, see if there is any interesting code
   // executed before the resume.
-  if (auto *CleanupAction = findCleanupHandler(BB, BB)) {
-    //   Add a cleanup entry to the list
-    Actions.insertCleanupHandler(CleanupAction);
-    DEBUG(dbgs() << "  Found cleanup code in block "
-                 << CleanupAction->getStartBlock()->getName() << "\n");
-  }
+  findCleanupHandlers(Actions, BB, BB);
 
   // It's possible that some optimization moved code into a landingpad that
   // wasn't
@@ -1313,20 +1458,56 @@ CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
   return nullptr;
 }
 
-// These are helper functions to combine repeated code from findCleanupHandler.
-static CleanupHandler *createCleanupHandler(CleanupHandlerMapTy &CleanupHandlerMap,
-                                            BasicBlock *BB) {
+// These are helper functions to combine repeated code from findCleanupHandlers.
+static void createCleanupHandler(LandingPadActions &Actions,
+                                 CleanupHandlerMapTy &CleanupHandlerMap,
+                                 BasicBlock *BB) {
   CleanupHandler *Action = new CleanupHandler(BB);
   CleanupHandlerMap[BB] = Action;
-  return Action;
+  Actions.insertCleanupHandler(Action);
+  DEBUG(dbgs() << "  Found cleanup code in block "
+               << Action->getStartBlock()->getName() << "\n");
+}
+
+static bool isFrameAddressCall(Value *V) {
+  return match(V, m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0)));
+}
+
+static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
+                                         Instruction *MaybeCall) {
+  // Look for finally blocks that Clang has already outlined for us.
+  //   %fp = call i8* @llvm.frameaddress(i32 0)
+  //   call void @"fin$parent"(iN 1, i8* %fp)
+  if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
+    MaybeCall = MaybeCall->getNextNode();
+  CallSite FinallyCall(MaybeCall);
+  if (!FinallyCall || FinallyCall.arg_size() != 2)
+    return CallSite();
+  if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
+    return CallSite();
+  if (!isFrameAddressCall(FinallyCall.getArgument(1)))
+    return CallSite();
+  return FinallyCall;
+}
+
+static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) {
+  // Skip single ubr blocks.
+  while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) {
+    auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
+    if (Br && Br->isUnconditional())
+      BB = Br->getSuccessor(0);
+    else
+      return BB;
+  }
+  return BB;
 }
 
 // This function searches starting with the input block for the next block that
 // contains code that is not part of a catch handler and would not be eliminated
 // during handler outlining.
 //
-CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
-                                                 BasicBlock *EndBB) {
+void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
+                                       BasicBlock *StartBB, BasicBlock *EndBB) {
   // Here we will skip over the following:
   //
   // landing pad prolog:
@@ -1343,6 +1524,7 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
   // Anything other than an unconditional branch will kick us out of this loop
   // one way or another.
   while (BB) {
+    BB = followSingleUnconditionalBranches(BB);
     // If we've already scanned this block, don't scan it again.  If it is
     // a cleanup block, there will be an action in the CleanupHandlerMap.
     // If we've scanned it and it is not a cleanup block, there will be a
@@ -1351,7 +1533,12 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
     // avoid creating a null entry for blocks we haven't scanned.
     if (CleanupHandlerMap.count(BB)) {
       if (auto *Action = CleanupHandlerMap[BB]) {
-        return cast<CleanupHandler>(Action);
+        Actions.insertCleanupHandler(Action);
+        DEBUG(dbgs() << "  Found cleanup code in block "
+              << Action->getStartBlock()->getName() << "\n");
+        // FIXME: This cleanup might chain into another, and we need to discover
+        // that.
+        return;
       } else {
         // Here we handle the case where the cleanup handler map contains a
         // value for this block but the value is a nullptr.  This means that
@@ -1363,11 +1550,9 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
         // would terminate the search for cleanup code, so the unconditional
         // branch is the only case for which we might need to continue
         // searching.
-        if (BB == EndBB)
-          return nullptr;
-        BasicBlock *SuccBB;
-        if (!match(BB->getTerminator(), m_UnconditionalBr(SuccBB)))
-          return nullptr;
+        BasicBlock *SuccBB = followSingleUnconditionalBranches(BB);
+        if (SuccBB == BB || SuccBB == EndBB)
+          return;
         BB = SuccBB;
         continue;
       }
@@ -1390,26 +1575,23 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
     }
 
     // Look for the bare resume pattern:
-    //   %exn2 = load i8** %exn.slot
-    //   %sel2 = load i32* %ehselector.slot
-    //   %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn2, 0
-    //   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel2, 1
+    //   %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0
+    //   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1
     //   resume { i8*, i32 } %lpad.val2
     if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
       InsertValueInst *Insert1 = nullptr;
       InsertValueInst *Insert2 = nullptr;
       Value *ResumeVal = Resume->getOperand(0);
-      // If there is only one landingpad, we may use the lpad directly with no
-      // insertions.
-      if (isa<LandingPadInst>(ResumeVal))
-        return nullptr;
-      if (!isa<PHINode>(ResumeVal)) {
+      // If the resume value isn't a phi or landingpad value, it should be a
+      // series of insertions. Identify them so we can avoid them when scanning
+      // for cleanups.
+      if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) {
         Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
         if (!Insert2)
-          return createCleanupHandler(CleanupHandlerMap, BB);
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
         Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
         if (!Insert1)
-          return createCleanupHandler(CleanupHandlerMap, BB);
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
       }
       for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
            II != IE; ++II) {
@@ -1420,66 +1602,133 @@ CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
           continue;
         if (!Inst->hasOneUse() ||
             (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
-          return createCleanupHandler(CleanupHandlerMap, BB);
+          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
         }
       }
-      return nullptr;
+      return;
     }
 
     BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
-    if (Branch) {
-      if (Branch->isConditional()) {
-        // Look for the selector dispatch.
-        //   %sel = load i32* %ehselector.slot
-        //   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
-        //   %matches = icmp eq i32 %sel12, %2
-        //   br i1 %matches, label %catch14, label %eh.resume
-        CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
-        if (!Compare || !Compare->isEquality())
-          return createCleanupHandler(CleanupHandlerMap, BB);
-        for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(),
-                                  IE = BB->end();
-             II != IE; ++II) {
-          Instruction *Inst = II;
-          if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
-            continue;
-          if (Inst == Compare || Inst == Branch)
-            continue;
-          if (!Inst->hasOneUse() || (Inst->user_back() != Compare))
-            return createCleanupHandler(CleanupHandlerMap, BB);
-          if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
-            continue;
-          if (!isa<LoadInst>(Inst))
-            return createCleanupHandler(CleanupHandlerMap, BB);
-        }
-        // The selector dispatch block should always terminate our search.
-        assert(BB == EndBB);
-        return nullptr;
-      } else {
-        // Look for empty blocks with unconditional branches.
-        for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(),
-                                  IE = BB->end();
-             II != IE; ++II) {
-          Instruction *Inst = II;
-          if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
-            continue;
-          if (Inst == Branch)
-            continue;
-          if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
-            continue;
-          // Anything else makes this interesting cleanup code.
-          return createCleanupHandler(CleanupHandlerMap, BB);
+    if (Branch && Branch->isConditional()) {
+      // Look for the selector dispatch.
+      //   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
+      //   %matches = icmp eq i32 %sel, %2
+      //   br i1 %matches, label %catch14, label %eh.resume
+      CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
+      if (!Compare || !Compare->isEquality())
+        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+           II != IE; ++II) {
+        Instruction *Inst = II;
+        if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+          continue;
+        if (Inst == Compare || Inst == Branch)
+          continue;
+        if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+          continue;
+        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+      }
+      // The selector dispatch block should always terminate our search.
+      assert(BB == EndBB);
+      return;
+    }
+
+    if (isAsynchronousEHPersonality(Personality)) {
+      // If this is a landingpad block, split the block at the first non-landing
+      // pad instruction.
+      Instruction *MaybeCall = BB->getFirstNonPHIOrDbg();
+      if (LPadMap) {
+        while (MaybeCall != BB->getTerminator() &&
+               LPadMap->isLandingPadSpecificInst(MaybeCall))
+          MaybeCall = MaybeCall->getNextNode();
+      }
+
+      // Look for outlined finally calls.
+      if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
+        Function *Fin = FinallyCall.getCalledFunction();
+        assert(Fin && "outlined finally call should be direct");
+        auto *Action = new CleanupHandler(BB);
+        Action->setHandlerBlockOrFunc(Fin);
+        Actions.insertCleanupHandler(Action);
+        CleanupHandlerMap[BB] = Action;
+        DEBUG(dbgs() << "  Found frontend-outlined finally call to "
+                     << Fin->getName() << " in block "
+                     << Action->getStartBlock()->getName() << "\n");
+
+        // Split the block if there were more interesting instructions and look
+        // for finally calls in the normal successor block.
+        BasicBlock *SuccBB = BB;
+        if (FinallyCall.getInstruction() != BB->getTerminator() &&
+            FinallyCall.getInstruction()->getNextNode() != BB->getTerminator()) {
+          SuccBB = BB->splitBasicBlock(FinallyCall.getInstruction()->getNextNode());
+        } else {
+          if (FinallyCall.isInvoke()) {
+            SuccBB = cast<InvokeInst>(FinallyCall.getInstruction())->getNormalDest();
+          } else {
+            SuccBB = BB->getUniqueSuccessor();
+            assert(SuccBB && "splitOutlinedFinallyCalls didn't insert a branch");
+          }
         }
+        BB = SuccBB;
         if (BB == EndBB)
-          return nullptr;
-        // The branch was unconditional.
-        BB = Branch->getSuccessor(0);
+          return;
+        continue;
+      }
+    }
+
+    // Anything else is either a catch block or interesting cleanup code.
+    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+         II != IE; ++II) {
+      Instruction *Inst = II;
+      if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+        continue;
+      // Unconditional branches fall through to this loop.
+      if (Inst == Branch)
         continue;
-      } // End else of if branch was conditional
-    }   // End if Branch
+      // If this is a catch block, there is no cleanup code to be found.
+      if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
+        return;
+      // If this a nested landing pad, it may contain an endcatch call.
+      if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+        return;
+      // Anything else makes this interesting cleanup code.
+      return createCleanupHandler(Actions, CleanupHandlerMap, BB);
+    }
 
-    // Anything else makes this interesting cleanup code.
-    return createCleanupHandler(CleanupHandlerMap, BB);
+    // Only unconditional branches in empty blocks should get this far.
+    assert(Branch && Branch->isUnconditional());
+    if (BB == EndBB)
+      return;
+    BB = Branch->getSuccessor(0);
   }
-  return nullptr;
+}
+
+// This is a public function, declared in WinEHFuncInfo.h and is also
+// referenced by WinEHNumbering in FunctionLoweringInfo.cpp.
+void llvm::parseEHActions(const IntrinsicInst *II,
+                          SmallVectorImpl<ActionHandler *> &Actions) {
+  for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) {
+    uint64_t ActionKind =
+        cast<ConstantInt>(II->getArgOperand(I))->getZExtValue();
+    if (ActionKind == /*catch=*/1) {
+      auto *Selector = cast<Constant>(II->getArgOperand(I + 1));
+      ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2));
+      int64_t EHObjIndexVal = EHObjIndex->getSExtValue();
+      Constant *Handler = cast<Constant>(II->getArgOperand(I + 3));
+      I += 4;
+      auto *CH = new CatchHandler(/*BB=*/nullptr, Selector, /*NextBB=*/nullptr);
+      CH->setHandlerBlockOrFunc(Handler);
+      CH->setExceptionVarIndex(EHObjIndexVal);
+      Actions.push_back(CH);
+    } else if (ActionKind == 0) {
+      Constant *Handler = cast<Constant>(II->getArgOperand(I + 1));
+      I += 2;
+      auto *CH = new CleanupHandler(/*BB=*/nullptr);
+      CH->setHandlerBlockOrFunc(Handler);
+      Actions.push_back(CH);
+    } else {
+      llvm_unreachable("Expected either a catch or cleanup handler!");
+    }
+  }
+  std::reverse(Actions.begin(), Actions.end());
 }
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 9f56214..7846529 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -197,8 +197,7 @@ public:
         DataAlignmentFactor(DataAlignmentFactor),
         ReturnAddressRegister(ReturnAddressRegister) {}
 
-  ~CIE() {
-  }
+  ~CIE() override {}
 
   uint64_t getCodeAlignmentFactor() const { return CodeAlignmentFactor; }
   int64_t getDataAlignmentFactor() const { return DataAlignmentFactor; }
@@ -245,8 +244,7 @@ public:
         InitialLocation(InitialLocation), AddressRange(AddressRange),
         LinkedCIE(Cie) {}
 
-  ~FDE() {
-  }
+  ~FDE() override {}
 
   CIE *getLinkedCIE() const { return LinkedCIE; }
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index b14af07..0aff327 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -48,9 +48,10 @@ public:
     reset();
   }
 
-  uint32_t getChildCount() const { return Args.size(); }
+  uint32_t getChildCount() const override { return Args.size(); }
 
-  std::unique_ptr<PDBSymbolData> getChildAtIndex(uint32_t Index) const {
+  std::unique_ptr<PDBSymbolData>
+  getChildAtIndex(uint32_t Index) const override {
     if (Index >= Args.size())
       return nullptr;
 
@@ -58,7 +59,7 @@ public:
         Args[Index]->getSymIndexId());
   }
 
-  std::unique_ptr<PDBSymbolData> getNext() {
+  std::unique_ptr<PDBSymbolData> getNext() override {
     if (CurIter == Args.end())
       return nullptr;
     const auto &Result = **CurIter;
@@ -66,9 +67,9 @@ public:
     return Session.getConcreteSymbolById<PDBSymbolData>(Result.getSymIndexId());
   }
 
-  void reset() { CurIter = Args.empty() ? Args.end() : Args.begin(); }
+  void reset() override { CurIter = Args.empty() ? Args.end() : Args.begin(); }
 
-  FunctionArgEnumerator *clone() const {
+  FunctionArgEnumerator *clone() const override {
     return new FunctionArgEnumerator(Session, Func);
   }
 
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index 8018206..af3563f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -34,25 +34,27 @@ public:
                         std::unique_ptr<ArgEnumeratorType> ArgEnumerator)
       : Session(PDBSession), Enumerator(std::move(ArgEnumerator)) {}
 
-  uint32_t getChildCount() const { return Enumerator->getChildCount(); }
+  uint32_t getChildCount() const override {
+    return Enumerator->getChildCount();
+  }
 
-  std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const {
+  std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override {
     auto FunctionArgSymbol = Enumerator->getChildAtIndex(Index);
     if (!FunctionArgSymbol)
       return nullptr;
     return Session.getSymbolById(FunctionArgSymbol->getTypeId());
   }
 
-  std::unique_ptr<PDBSymbol> getNext() {
+  std::unique_ptr<PDBSymbol> getNext() override {
     auto FunctionArgSymbol = Enumerator->getNext();
     if (!FunctionArgSymbol)
       return nullptr;
     return Session.getSymbolById(FunctionArgSymbol->getTypeId());
   }
 
-  void reset() { Enumerator->reset(); }
+  void reset() override { Enumerator->reset(); }
 
-  MyType *clone() const {
+  MyType *clone() const override {
     std::unique_ptr<ArgEnumeratorType> Clone(Enumerator->clone());
     return new FunctionArgEnumerator(Session, std::move(Clone));
   }
diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h
deleted file mode 100644
index 6453099..0000000
--- a/lib/ExecutionEngine/EventListenerCommon.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Common functionality for JITEventListener implementations
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef EVENT_LISTENER_COMMON_H
-#define EVENT_LISTENER_COMMON_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/Path.h"
-
-namespace llvm {
-
-namespace jitprofiling {
-
-class FilenameCache {
-  // Holds the filename of each Scope, so that we can pass a null-terminated
-  // string into oprofile.  
-  DenseMap<const MDNode *, std::string> Filenames;
-  DenseMap<const MDNode *, std::string> Paths;
-
- public:
-  const char *getFilename(MDNode *Scope) {
-    assert(Scope->isResolved() && "Expected Scope to be resolved");
-    std::string &Filename = Filenames[Scope];
-    if (Filename.empty()) {
-      DIScope DIScope(Scope);
-      Filename = DIScope.getFilename();
-    }
-    return Filename.c_str();
-  }
-
-  const char *getFullPath(MDNode *Scope) {
-    assert(Scope->isResolved() && "Expected Scope to be resolved");
-    std::string &P = Paths[Scope];
-    if (P.empty()) {
-      DIScope DIScope(Scope);
-      StringRef DirName = DIScope.getDirectory();
-      StringRef FileName = DIScope.getFilename();
-      SmallString<256> FullPath;
-      if (DirName != "." && DirName != "") {
-        FullPath = DirName;
-      }
-      if (FileName != "") {
-        sys::path::append(FullPath, FileName);
-      }
-      P = FullPath.str();
-    }
-    return P.c_str();
-  }
-};
-
-} // namespace jitprofiling
-
-} // namespace llvm
-
-#endif //EVENT_LISTENER_COMMON_H
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index c586ba7..d7038fd 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -18,9 +18,11 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
@@ -45,11 +47,13 @@ STATISTIC(NumGlobals  , "Number of global vars initialized");
 
 ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
     std::unique_ptr<Module> M, std::string *ErrorStr,
-    std::unique_ptr<RTDyldMemoryManager> MCJMM,
+    std::shared_ptr<MCJITMemoryManager> MemMgr,
+    std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
     std::unique_ptr<TargetMachine> TM) = nullptr;
 
 ExecutionEngine *(*ExecutionEngine::OrcMCJITReplacementCtor)(
-  std::string *ErrorStr, std::unique_ptr<RTDyldMemoryManager> OrcJMM,
+  std::string *ErrorStr, std::shared_ptr<MCJITMemoryManager> MemMgr,
+  std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
   std::unique_ptr<TargetMachine> TM) = nullptr;
 
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
@@ -58,8 +62,7 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
 void JITEventListener::anchor() {}
 
 ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
-  : EEState(*this),
-    LazyFunctionCreator(nullptr) {
+  : LazyFunctionCreator(nullptr) {
   CompilingLazily         = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
@@ -151,38 +154,52 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
 }
 
 
-void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) {
-  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
-  void *OldVal;
+uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) {
+  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(Name);
+  uint64_t OldVal;
 
   // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
   // GlobalAddressMap.
   if (I == GlobalAddressMap.end())
-    OldVal = nullptr;
+    OldVal = 0;
   else {
+    GlobalAddressReverseMap.erase(I->second);
     OldVal = I->second;
     GlobalAddressMap.erase(I);
   }
 
-  GlobalAddressReverseMap.erase(OldVal);
   return OldVal;
 }
 
+std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
+  MutexGuard locked(lock);
+  Mangler Mang(DL);
+  SmallString<128> FullName;
+  Mang.getNameWithPrefix(FullName, GV->getName());
+  return FullName.str();
+}
+
 void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
   MutexGuard locked(lock);
+  addGlobalMapping(getMangledName(GV), (uint64_t) Addr);
+}
+
+void ExecutionEngine::addGlobalMapping(StringRef Name, uint64_t Addr) {
+  MutexGuard locked(lock);
+
+  assert(!Name.empty() && "Empty GlobalMapping symbol name!");
 
-  DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
-        << "\' to [" << Addr << "]\n";);
-  void *&CurVal = EEState.getGlobalAddressMap()[GV];
+  DEBUG(dbgs() << "JIT: Map \'" << Name  << "\' to [" << Addr << "]\n";);
+  uint64_t &CurVal = EEState.getGlobalAddressMap()[Name];
   assert((!CurVal || !Addr) && "GlobalMapping already established!");
   CurVal = Addr;
 
   // If we are using the reverse mapping, add it too.
   if (!EEState.getGlobalAddressReverseMap().empty()) {
-    AssertingVH<const GlobalValue> &V =
-      EEState.getGlobalAddressReverseMap()[Addr];
-    assert((!V || !GV) && "GlobalMapping already established!");
-    V = GV;
+    std::string &V = EEState.getGlobalAddressReverseMap()[CurVal];
+    assert((!V.empty() || !Name.empty()) &&
+           "GlobalMapping already established!");
+    V = Name;
   }
 }
 
@@ -197,13 +214,19 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
   MutexGuard locked(lock);
 
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
-    EEState.RemoveMapping(FI);
+    EEState.RemoveMapping(getMangledName(FI));
   for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
        GI != GE; ++GI)
-    EEState.RemoveMapping(GI);
+    EEState.RemoveMapping(getMangledName(GI));
+}
+
+uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV,
+                                              void *Addr) {
+  MutexGuard locked(lock);
+  return updateGlobalMapping(getMangledName(GV), (uint64_t) Addr);
 }
 
-void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
+uint64_t ExecutionEngine::updateGlobalMapping(StringRef Name, uint64_t Addr) {
   MutexGuard locked(lock);
 
   ExecutionEngineState::GlobalAddressMapTy &Map =
@@ -211,10 +234,10 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 
   // Deleting from the mapping?
   if (!Addr)
-    return EEState.RemoveMapping(GV);
+    return EEState.RemoveMapping(Name);
 
-  void *&CurVal = Map[GV];
-  void *OldVal = CurVal;
+  uint64_t &CurVal = Map[Name];
+  uint64_t OldVal = CurVal;
 
   if (CurVal && !EEState.getGlobalAddressReverseMap().empty())
     EEState.getGlobalAddressReverseMap().erase(CurVal);
@@ -222,20 +245,35 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
 
   // If we are using the reverse mapping, add it too.
   if (!EEState.getGlobalAddressReverseMap().empty()) {
-    AssertingVH<const GlobalValue> &V =
-      EEState.getGlobalAddressReverseMap()[Addr];
-    assert((!V || !GV) && "GlobalMapping already established!");
-    V = GV;
+    std::string &V = EEState.getGlobalAddressReverseMap()[CurVal];
+    assert((!V.empty() || !Name.empty()) &&
+           "GlobalMapping already established!");
+    V = Name;
   }
   return OldVal;
 }
 
-void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+uint64_t ExecutionEngine::getAddressToGlobalIfAvailable(StringRef S) {
   MutexGuard locked(lock);
-
+  uint64_t Address = 0;
   ExecutionEngineState::GlobalAddressMapTy::iterator I =
-    EEState.getGlobalAddressMap().find(GV);
-  return I != EEState.getGlobalAddressMap().end() ? I->second : nullptr;
+    EEState.getGlobalAddressMap().find(S);
+  if (I != EEState.getGlobalAddressMap().end())
+    Address = I->second;
+  return Address;
+}
+
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(StringRef S) {
+  MutexGuard locked(lock);
+  if (void* Address = (void *) getAddressToGlobalIfAvailable(S))
+    return Address;
+  return nullptr;
+}
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+  MutexGuard locked(lock);
+  return getPointerToGlobalIfAvailable(getMangledName(GV));
 }
 
 const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
@@ -244,15 +282,25 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
   // If we haven't computed the reverse mapping yet, do so first.
   if (EEState.getGlobalAddressReverseMap().empty()) {
     for (ExecutionEngineState::GlobalAddressMapTy::iterator
-         I = EEState.getGlobalAddressMap().begin(),
-         E = EEState.getGlobalAddressMap().end(); I != E; ++I)
+           I = EEState.getGlobalAddressMap().begin(),
+           E = EEState.getGlobalAddressMap().end(); I != E; ++I) {
+      StringRef Name = I->first();
+      uint64_t Addr = I->second;
       EEState.getGlobalAddressReverseMap().insert(std::make_pair(
-                                                          I->second, I->first));
+                                                          Addr, Name));
+    }
   }
 
-  std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
-    EEState.getGlobalAddressReverseMap().find(Addr);
-  return I != EEState.getGlobalAddressReverseMap().end() ? I->second : nullptr;
+  std::map<uint64_t, std::string>::iterator I =
+    EEState.getGlobalAddressReverseMap().find((uint64_t) Addr);
+
+  if (I != EEState.getGlobalAddressReverseMap().end()) {
+    StringRef Name = I->second;
+    for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+      if (GlobalValue *GV = Modules[i]->getNamedValue(Name))
+        return GV;
+  }
+  return nullptr;
 }
 
 namespace {
@@ -404,8 +452,9 @@ EngineBuilder::EngineBuilder() : EngineBuilder(nullptr) {}
 
 EngineBuilder::EngineBuilder(std::unique_ptr<Module> M)
     : M(std::move(M)), WhichEngine(EngineKind::Either), ErrorStr(nullptr),
-      OptLevel(CodeGenOpt::Default), MCJMM(nullptr), RelocModel(Reloc::Default),
-      CMModel(CodeModel::JITDefault), UseOrcMCJITReplacement(false) {
+      OptLevel(CodeGenOpt::Default), MemMgr(nullptr), Resolver(nullptr),
+      RelocModel(Reloc::Default), CMModel(CodeModel::JITDefault),
+      UseOrcMCJITReplacement(false) {
 // IR module verification is enabled by default in debug builds, and disabled
 // by default in release builds.
 #ifndef NDEBUG
@@ -419,7 +468,21 @@ EngineBuilder::~EngineBuilder() = default;
 
 EngineBuilder &EngineBuilder::setMCJITMemoryManager(
                                    std::unique_ptr<RTDyldMemoryManager> mcjmm) {
-  MCJMM = std::move(mcjmm);
+  auto SharedMM = std::shared_ptr<RTDyldMemoryManager>(std::move(mcjmm));
+  MemMgr = SharedMM;
+  Resolver = SharedMM;
+  return *this;
+}
+
+EngineBuilder&
+EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) {
+  MemMgr = std::shared_ptr<MCJITMemoryManager>(std::move(MM));
+  return *this;
+}
+
+EngineBuilder&
+EngineBuilder::setSymbolResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> SR) {
+  Resolver = std::shared_ptr<RuntimeDyld::SymbolResolver>(std::move(SR));
   return *this;
 }
 
@@ -434,7 +497,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
   // If the user specified a memory manager but didn't specify which engine to
   // create, we assume they only want the JIT, and we fail if they only want
   // the interpreter.
-  if (MCJMM) {
+  if (MemMgr) {
     if (WhichEngine & EngineKind::JIT)
       WhichEngine = EngineKind::JIT;
     else {
@@ -456,12 +519,13 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
 
     ExecutionEngine *EE = nullptr;
     if (ExecutionEngine::OrcMCJITReplacementCtor && UseOrcMCJITReplacement) {
-      EE = ExecutionEngine::OrcMCJITReplacementCtor(ErrorStr, std::move(MCJMM),
+      EE = ExecutionEngine::OrcMCJITReplacementCtor(ErrorStr, std::move(MemMgr),
+                                                    std::move(Resolver),
                                                     std::move(TheTM));
       EE->addModule(std::move(M));
     } else if (ExecutionEngine::MCJITCtor)
-      EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MCJMM),
-                                      std::move(TheTM));
+      EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, std::move(MemMgr),
+                                      std::move(Resolver), std::move(TheTM));
 
     if (EE) {
       EE->setVerifyModules(VerifyModules);
@@ -492,7 +556,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
     return getPointerToFunction(F);
 
   MutexGuard locked(lock);
-  if (void *P = EEState.getGlobalAddressMap()[GV])
+  if (void* P = getPointerToGlobalIfAvailable(GV))
     return P;
 
   // Global variable might have been added since interpreter started.
@@ -502,7 +566,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
   else
     llvm_unreachable("Global hasn't had an address allocated yet!");
 
-  return EEState.getGlobalAddressMap()[GV];
+  return getPointerToGlobalIfAvailable(GV);
 }
 
 /// \brief Converts a Constant* into a GenericValue, including handling of
@@ -1274,25 +1338,3 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
   NumInitBytes += (unsigned)GVSize;
   ++NumGlobals;
 }
-
-ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
-  : EE(EE), GlobalAddressMap(this) {
-}
-
-sys::Mutex *
-ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
-  return &EES->EE.lock;
-}
-
-void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
-                                                      const GlobalValue *Old) {
-  void *OldVal = EES->GlobalAddressMap.lookup(Old);
-  EES->GlobalAddressReverseMap.erase(OldVal);
-}
-
-void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
-                                                    const GlobalValue *,
-                                                    const GlobalValue *) {
-  llvm_unreachable("The ExecutionEngine doesn't know how to handle a"
-                   " RAUW on a value it has a global mapping for.");
-}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index aaa53f0..22ff311 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -351,7 +351,7 @@ class SimpleBindingMemoryManager : public RTDyldMemoryManager {
 public:
   SimpleBindingMemoryManager(const SimpleBindingMMFunctions& Functions,
                              void *Opaque);
-  virtual ~SimpleBindingMemoryManager();
+  ~SimpleBindingMemoryManager() override;
 
   uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID,
diff --git a/lib/ExecutionEngine/GDBRegistrationListener.cpp b/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 8ef878c..1ab6203 100644
--- a/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -103,7 +103,7 @@ public:
 
   /// Unregisters each object that was previously registered and releases all
   /// internal resources.
-  virtual ~GDBJITRegistrationListener();
+  ~GDBJITRegistrationListener() override;
 
   /// Creates an entry in the JIT registry for the buffer @p Object,
   /// which must contain an object file in executable memory with any
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index aa32452..4135900 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Config/config.h"
-#include "EventListenerCommon.h"
 #include "IntelJITEventsWrapper.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -29,7 +28,6 @@
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
-using namespace llvm::jitprofiling;
 using namespace llvm::object;
 
 #define DEBUG_TYPE "amplifier-jit-event-listener"
@@ -41,7 +39,6 @@ class IntelJITEventListener : public JITEventListener {
 
   std::unique_ptr<IntelJITEventsWrapper> Wrapper;
   MethodIDMap MethodIDs;
-  FilenameCache Filenames;
 
   typedef SmallVector<const void *, 64> MethodAddressVector;
   typedef DenseMap<const void *, MethodAddressVector>  ObjectMap;
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 2e8eb16..a26740b 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -316,7 +316,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
 
 #define IMPLEMENT_VECTOR_FCMP(OP)                                   \
   case Type::VectorTyID:                                            \
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {   \
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {      \
       IMPLEMENT_VECTOR_FCMP_T(OP, Float);                           \
     } else {                                                        \
         IMPLEMENT_VECTOR_FCMP_T(OP, Double);                        \
@@ -363,7 +363,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
 
 #define MASK_VECTOR_NANS(TY, X,Y, FLAG)                                     \
   if (TY->isVectorTy()) {                                                   \
-    if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) {          \
+    if (cast<VectorType>(TY)->getElementType()->isFloatTy()) {              \
       MASK_VECTOR_NANS_T(X, Y, Float, FLAG)                                 \
     } else {                                                                \
       MASK_VECTOR_NANS_T(X, Y, Double, FLAG)                                \
@@ -536,7 +536,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
   if(Ty->isVectorTy()) {
     assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
     Dest.AggregateVal.resize( Src1.AggregateVal.size() );
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
       for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
         Dest.AggregateVal[_i].IntVal = APInt(1,
         ( (Src1.AggregateVal[_i].FloatVal ==
@@ -567,7 +567,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
   if(Ty->isVectorTy()) {
     assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
     Dest.AggregateVal.resize( Src1.AggregateVal.size() );
-    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+    if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
       for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
         Dest.AggregateVal[_i].IntVal = APInt(1,
         ( (Src1.AggregateVal[_i].FloatVal !=
@@ -713,10 +713,10 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
     // Macros to choose appropriate TY: float or double and run operation
     // execution
 #define FLOAT_VECTOR_OP(OP) {                                         \
-  if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())        \
+  if (cast<VectorType>(Ty)->getElementType()->isFloatTy())            \
     FLOAT_VECTOR_FUNCTION(OP, FloatVal)                               \
   else {                                                              \
-    if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())     \
+    if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())         \
       FLOAT_VECTOR_FUNCTION(OP, DoubleVal)                            \
     else {                                                            \
       dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
@@ -745,12 +745,12 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
     case Instruction::FMul:  FLOAT_VECTOR_OP(*) break;
     case Instruction::FDiv:  FLOAT_VECTOR_OP(/) break;
     case Instruction::FRem:
-      if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+      if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
         for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
           R.AggregateVal[i].FloatVal = 
           fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
       else {
-        if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+        if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
           for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
             R.AggregateVal[i].DoubleVal = 
             fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index b022101..e2fe065 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -95,16 +95,15 @@ static ExFunc lookupFunction(const Function *F) {
   FunctionType *FT = F->getFunctionType();
   for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
     ExtName += getTypeID(FT->getContainedType(i));
-  ExtName += "_" + F->getName().str();
+  ExtName += ("_" + F->getName()).str();
 
   sys::ScopedLock Writer(*FunctionsLock);
   ExFunc FnPtr = (*FuncNames)[ExtName];
   if (!FnPtr)
-    FnPtr = (*FuncNames)["lle_X_" + F->getName().str()];
+    FnPtr = (*FuncNames)[("lle_X_" + F->getName()).str()];
   if (!FnPtr)  // Try calling a generic function... if it exists...
-    FnPtr = (ExFunc)(intptr_t)
-      sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" +
-                                                    F->getName().str());
+    FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
+        ("lle_X_" + F->getName()).str());
   if (FnPtr)
     ExportedFunctions->insert(std::make_pair(F, FnPtr));  // Cache for later
   return FnPtr;
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 2be9c59..0dc0463 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -108,7 +108,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
 
 public:
   explicit Interpreter(std::unique_ptr<Module> M);
-  ~Interpreter();
+  ~Interpreter() override;
 
   /// runAtExitHandlers - Run any functions registered by the program's calls to
   /// atexit(3), which we intercept and store in AtExitHandlers.
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 20b8553..7e37afe 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCJIT.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
@@ -41,26 +42,35 @@ static struct RegisterJIT {
 extern "C" void LLVMLinkInMCJIT() {
 }
 
-ExecutionEngine *MCJIT::createJIT(std::unique_ptr<Module> M,
-                                  std::string *ErrorStr,
-                                  std::unique_ptr<RTDyldMemoryManager> MemMgr,
-                                  std::unique_ptr<TargetMachine> TM) {
+ExecutionEngine*
+MCJIT::createJIT(std::unique_ptr<Module> M,
+                 std::string *ErrorStr,
+                 std::shared_ptr<MCJITMemoryManager> MemMgr,
+                 std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+                 std::unique_ptr<TargetMachine> TM) {
   // Try to register the program as a source of symbols to resolve against.
   //
   // FIXME: Don't do this here.
   sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr);
 
-  std::unique_ptr<RTDyldMemoryManager> MM = std::move(MemMgr);
-  if (!MM)
-    MM = std::unique_ptr<SectionMemoryManager>(new SectionMemoryManager());
+  if (!MemMgr || !Resolver) {
+    auto RTDyldMM = std::make_shared<SectionMemoryManager>();
+    if (!MemMgr)
+      MemMgr = RTDyldMM;
+    if (!Resolver)
+      Resolver = RTDyldMM;
+  }
 
-  return new MCJIT(std::move(M), std::move(TM), std::move(MM));
+  return new MCJIT(std::move(M), std::move(TM), std::move(MemMgr),
+                   std::move(Resolver));
 }
 
 MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
-             std::unique_ptr<RTDyldMemoryManager> MM)
+             std::shared_ptr<MCJITMemoryManager> MemMgr,
+             std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
     : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr),
-      MemMgr(this, std::move(MM)), Dyld(&MemMgr), ObjCache(nullptr) {
+      MemMgr(std::move(MemMgr)), Resolver(*this, std::move(Resolver)),
+      Dyld(*this->MemMgr, this->Resolver), ObjCache(nullptr) {
   // FIXME: We are managing our modules, so we do not want the base class
   // ExecutionEngine to manage them as well. To avoid double destruction
   // of the first (and only) module added in ExecutionEngine constructor
@@ -221,7 +231,7 @@ void MCJIT::finalizeLoadedModules() {
   Dyld.registerEHFrames();
 
   // Set page permissions.
-  MemMgr.finalizeMemory();
+  MemMgr->finalizeMemory();
 }
 
 // FIXME: Rename this.
@@ -253,11 +263,11 @@ void MCJIT::finalizeModule(Module *M) {
   finalizeLoadedModules();
 }
 
-uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) {
+RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
   Mangler Mang(TM->getDataLayout());
   SmallString<128> FullName;
   Mang.getNameWithPrefix(FullName, Name);
-  return Dyld.getSymbol(FullName).getAddress();
+  return Dyld.getSymbol(FullName);
 }
 
 Module *MCJIT::findModuleForSymbol(const std::string &Name,
@@ -284,14 +294,17 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
 }
 
 uint64_t MCJIT::getSymbolAddress(const std::string &Name,
-                                 bool CheckFunctionsOnly)
-{
+                                 bool CheckFunctionsOnly) {
+  return findSymbol(Name, CheckFunctionsOnly).getAddress();
+}
+
+RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name,
+                                          bool CheckFunctionsOnly) {
   MutexGuard locked(lock);
 
   // First, check to see if we already have this symbol.
-  uint64_t Addr = getExistingSymbolAddress(Name);
-  if (Addr)
-    return Addr;
+  if (auto Sym = findExistingSymbol(Name))
+    return Sym;
 
   for (object::OwningBinary<object::Archive> &OB : Archives) {
     object::Archive *A = OB.getBinary();
@@ -310,9 +323,8 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
         // This causes the object file to be loaded.
         addObjectFile(std::move(OF));
         // The address should be here now.
-        Addr = getExistingSymbolAddress(Name);
-        if (Addr)
-          return Addr;
+        if (auto Sym = findExistingSymbol(Name))
+          return Sym;
       }
     }
   }
@@ -323,15 +335,18 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
     generateCodeForModule(M);
 
     // Check the RuntimeDyld table again, it should be there now.
-    return getExistingSymbolAddress(Name);
+    return findExistingSymbol(Name);
   }
 
   // If a LazyFunctionCreator is installed, use it to get/create the function.
   // FIXME: Should we instead have a LazySymbolCreator callback?
-  if (LazyFunctionCreator)
-    Addr = (uint64_t)LazyFunctionCreator(Name);
+  if (LazyFunctionCreator) {
+    auto Addr = static_cast<uint64_t>(
+                  reinterpret_cast<uintptr_t>(LazyFunctionCreator(Name)));
+    return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
+  }
 
-  return Addr;
+  return nullptr;
 }
 
 uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) {
@@ -528,7 +543,9 @@ GenericValue MCJIT::runFunction(Function *F,
 
 void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) {
   if (!isSymbolSearchingDisabled()) {
-    void *ptr = MemMgr.getPointerToNamedFunction(Name, false);
+    void *ptr =
+      reinterpret_cast<void*>(
+        static_cast<uintptr_t>(Resolver.findSymbol(Name).getAddress()));
     if (ptr)
       return ptr;
   }
@@ -566,7 +583,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
 void MCJIT::NotifyObjectEmitted(const object::ObjectFile& Obj,
                                 const RuntimeDyld::LoadedObjectInfo &L) {
   MutexGuard locked(lock);
-  MemMgr.notifyObjectLoaded(this, Obj);
+  MemMgr->notifyObjectLoaded(this, Obj);
   for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
     EventListeners[I]->NotifyObjectEmitted(Obj, L);
   }
@@ -578,15 +595,16 @@ void MCJIT::NotifyFreeingObject(const object::ObjectFile& Obj) {
     L->NotifyFreeingObject(Obj);
 }
 
-uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) {
-  uint64_t Result = ParentEngine->getSymbolAddress(Name, false);
+RuntimeDyld::SymbolInfo
+LinkingSymbolResolver::findSymbol(const std::string &Name) {
+  auto Result = ParentEngine.findSymbol(Name, false);
   // If the symbols wasn't found and it begins with an underscore, try again
   // without the underscore.
   if (!Result && Name[0] == '_')
-    Result = ParentEngine->getSymbolAddress(Name.substr(1), false);
+    Result = ParentEngine.findSymbol(Name.substr(1), false);
   if (Result)
     return Result;
-  if (ParentEngine->isSymbolSearchingDisabled())
-    return 0;
-  return ClientMM->getSymbolAddress(Name);
+  if (ParentEngine.isSymbolSearchingDisabled())
+    return nullptr;
+  return ClientResolver->findSymbol(Name);
 }
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index de4a8f6..59e9949 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -16,6 +16,7 @@
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/IR/Module.h"
 
@@ -26,59 +27,23 @@ class MCJIT;
 // functions across modules that it owns.  It aggregates the memory manager
 // that is passed in to the MCJIT constructor and defers most functionality
 // to that object.
-class LinkingMemoryManager : public RTDyldMemoryManager {
+class LinkingSymbolResolver : public RuntimeDyld::SymbolResolver {
 public:
-  LinkingMemoryManager(MCJIT *Parent,
-                       std::unique_ptr<RTDyldMemoryManager> MM)
-    : ParentEngine(Parent), ClientMM(std::move(MM)) {}
+  LinkingSymbolResolver(MCJIT &Parent,
+                        std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
+    : ParentEngine(Parent), ClientResolver(std::move(Resolver)) {}
 
-  uint64_t getSymbolAddress(const std::string &Name) override;
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override;
 
-  // Functions deferred to client memory manager
-  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID,
-                               StringRef SectionName) override {
-    return ClientMM->allocateCodeSection(Size, Alignment, SectionID, SectionName);
-  }
-
-  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                               unsigned SectionID, StringRef SectionName,
-                               bool IsReadOnly) override {
-    return ClientMM->allocateDataSection(Size, Alignment,
-                                         SectionID, SectionName, IsReadOnly);
-  }
-
-  void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
-                              uintptr_t DataSizeRW) override {
-    return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
-  }
-
-  bool needsToReserveAllocationSpace() override {
-    return ClientMM->needsToReserveAllocationSpace();
-  }
-
-  void notifyObjectLoaded(ExecutionEngine *EE,
-                          const object::ObjectFile &Obj) override {
-    ClientMM->notifyObjectLoaded(EE, Obj);
-  }
-
-  void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                        size_t Size) override {
-    ClientMM->registerEHFrames(Addr, LoadAddr, Size);
-  }
-
-  void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                          size_t Size) override {
-    ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
-  }
-
-  bool finalizeMemory(std::string *ErrMsg = nullptr) override {
-    return ClientMM->finalizeMemory(ErrMsg);
+  // MCJIT doesn't support logical dylibs.
+  RuntimeDyld::SymbolInfo
+  findSymbolInLogicalDylib(const std::string &Name) override {
+    return nullptr;
   }
 
 private:
-  MCJIT *ParentEngine;
-  std::unique_ptr<RTDyldMemoryManager> ClientMM;
+  MCJIT &ParentEngine;
+  std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver;
 };
 
 // About Module states: added->loaded->finalized.
@@ -103,7 +68,8 @@ private:
 
 class MCJIT : public ExecutionEngine {
   MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
-        std::unique_ptr<RTDyldMemoryManager> MemMgr);
+        std::shared_ptr<MCJITMemoryManager> MemMgr,
+        std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver);
 
   typedef llvm::SmallPtrSet<Module *, 4> ModulePtrSet;
 
@@ -214,7 +180,8 @@ class MCJIT : public ExecutionEngine {
 
   std::unique_ptr<TargetMachine> TM;
   MCContext *Ctx;
-  LinkingMemoryManager MemMgr;
+  std::shared_ptr<MCJITMemoryManager> MemMgr;
+  LinkingSymbolResolver Resolver;
   RuntimeDyld Dyld;
   std::vector<JITEventListener*> EventListeners;
 
@@ -238,7 +205,7 @@ class MCJIT : public ExecutionEngine {
                                                       ModulePtrSet::iterator E);
 
 public:
-  ~MCJIT();
+  ~MCJIT() override;
 
   /// @name ExecutionEngine interface implementation
   /// @{
@@ -324,17 +291,22 @@ public:
     MCJITCtor = createJIT;
   }
 
-  static ExecutionEngine *createJIT(std::unique_ptr<Module> M,
-                                    std::string *ErrorStr,
-                                    std::unique_ptr<RTDyldMemoryManager> MemMgr,
-                                    std::unique_ptr<TargetMachine> TM);
+  static ExecutionEngine*
+  createJIT(std::unique_ptr<Module> M,
+            std::string *ErrorStr,
+            std::shared_ptr<MCJITMemoryManager> MemMgr,
+            std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+            std::unique_ptr<TargetMachine> TM);
 
   // @}
 
+  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name,
+                                     bool CheckFunctionsOnly);
+  // DEPRECATED - Please use findSymbol instead.
   // This is not directly exposed via the ExecutionEngine API, but it is
   // used by the LinkingMemoryManager.
   uint64_t getSymbolAddress(const std::string &Name,
-                          bool CheckFunctionsOnly);
+                            bool CheckFunctionsOnly);
 
 protected:
   /// emitObject -- Generate a JITed object in memory from the specified module
@@ -348,7 +320,7 @@ protected:
                            const RuntimeDyld::LoadedObjectInfo &L);
   void NotifyFreeingObject(const object::ObjectFile& Obj);
 
-  uint64_t getExistingSymbolAddress(const std::string &Name);
+  RuntimeDyld::SymbolInfo findExistingSymbol(const std::string &Name);
   Module *findModuleForSymbol(const std::string &Name,
                               bool CheckFunctionsOnly);
 };
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 9ab4003..23e7662 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Config/config.h"
-#include "EventListenerCommon.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/OProfileWrapper.h"
@@ -28,7 +27,6 @@
 #include <fcntl.h>
 
 using namespace llvm;
-using namespace llvm::jitprofiling;
 using namespace llvm::object;
 
 #define DEBUG_TYPE "oprofile-jit-event-listener"
diff --git a/lib/ExecutionEngine/Orc/Android.mk b/lib/ExecutionEngine/Orc/Android.mk
index 61c1daf..f28f359 100644
--- a/lib/ExecutionEngine/Orc/Android.mk
+++ b/lib/ExecutionEngine/Orc/Android.mk
@@ -6,6 +6,7 @@ include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES := \
   CloneSubModule.cpp \
+  ExecutionUtils.cpp \
   IndirectionUtils.cpp \
   OrcMCJITReplacement.cpp \
   OrcTargetSupport.cpp
diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt
index b0a8445..b38b459 100644
--- a/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMOrcJIT
   CloneSubModule.cpp
+  ExecutionUtils.cpp
   IndirectionUtils.cpp
   OrcMCJITReplacement.cpp
   OrcTargetSupport.cpp
diff --git a/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
new file mode 100644
index 0000000..b7220db
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -0,0 +1,102 @@
+//===---- ExecutionUtils.cpp - Utilities for executing functions in Orc ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+namespace orc {
+
+CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End)
+  : InitList(
+      GV ? dyn_cast_or_null<ConstantArray>(GV->getInitializer()) : nullptr),
+    I((InitList && End) ? InitList->getNumOperands() : 0) {
+}
+
+bool CtorDtorIterator::operator==(const CtorDtorIterator &Other) const {
+  assert(InitList == Other.InitList && "Incomparable iterators.");
+  return I == Other.I;
+}
+
+bool CtorDtorIterator::operator!=(const CtorDtorIterator &Other) const {
+  return !(*this == Other);
+}
+
+CtorDtorIterator& CtorDtorIterator::operator++() {
+  ++I;
+  return *this;
+}
+
+CtorDtorIterator CtorDtorIterator::operator++(int) {
+  CtorDtorIterator Temp = *this;
+  ++I;
+  return Temp;
+}
+
+CtorDtorIterator::Element CtorDtorIterator::operator*() const {
+  ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(I));
+  assert(CS && "Unrecognized type in llvm.global_ctors/llvm.global_dtors");
+
+  Constant *FuncC = CS->getOperand(1);
+  Function *Func = nullptr;
+
+  // Extract function pointer, pulling off any casts.
+  while (FuncC) {
+    if (Function *F = dyn_cast_or_null<Function>(FuncC)) {
+      Func = F;
+      break;
+    } else if (ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(FuncC)) {
+      if (CE->isCast())
+        FuncC = dyn_cast_or_null<ConstantExpr>(CE->getOperand(0));
+      else
+        break;
+    } else {
+      // This isn't anything we recognize. Bail out with Func left set to null.
+      break;
+    }
+  }
+
+  ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
+  Value *Data = CS->getOperand(2);
+  return Element(Priority->getZExtValue(), Func, Data);
+}
+
+iterator_range<CtorDtorIterator> getConstructors(const Module &M) {
+  const GlobalVariable *CtorsList = M.getNamedGlobal("llvm.global_ctors");
+  return make_range(CtorDtorIterator(CtorsList, false),
+                    CtorDtorIterator(CtorsList, true));
+}
+
+iterator_range<CtorDtorIterator> getDestructors(const Module &M) {
+  const GlobalVariable *DtorsList = M.getNamedGlobal("llvm.global_dtors");
+  return make_range(CtorDtorIterator(DtorsList, false),
+                    CtorDtorIterator(DtorsList, true));
+}
+
+void LocalCXXRuntimeOverrides::runDestructors() {
+  auto& CXXDestructorDataPairs = DSOHandleOverride;
+  for (auto &P : CXXDestructorDataPairs)
+    P.first(P.second);
+  CXXDestructorDataPairs.clear();
+}
+
+int LocalCXXRuntimeOverrides::CXAAtExitOverride(DestructorPtr Destructor,
+                                                void *Arg, void *DSOHandle) {
+  auto& CXXDestructorDataPairs =
+    *reinterpret_cast<CXXDestructorDataPairList*>(DSOHandle);
+  CXXDestructorDataPairs.push_back(std::make_pair(Destructor, Arg));
+  return 0;
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 8cf490f..ebeedef 100644
--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -14,17 +14,25 @@
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/IRBuilder.h"
 #include <set>
+#include <sstream>
 
 namespace llvm {
 namespace orc {
 
-GlobalVariable* createImplPointer(Function &F, const Twine &Name,
-                                  Constant *Initializer) {
-  assert(F.getParent() && "Function isn't in a module.");
+Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) {
+  Constant *AddrIntVal =
+    ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr);
+  Constant *AddrPtrVal =
+    ConstantExpr::getCast(Instruction::IntToPtr, AddrIntVal,
+                          PointerType::get(&FT, 0));
+  return AddrPtrVal;
+}
+
+GlobalVariable* createImplPointer(PointerType &PT, Module &M,
+                                  const Twine &Name, Constant *Initializer) {
   if (!Initializer)
-    Initializer = Constant::getNullValue(F.getType());
-  Module &M = *F.getParent();
-  return new GlobalVariable(M, F.getType(), false, GlobalValue::ExternalLinkage,
+    Initializer = Constant::getNullValue(&PT);
+  return new GlobalVariable(M, &PT, false, GlobalValue::ExternalLinkage,
                             Initializer, Name, nullptr,
                             GlobalValue::NotThreadLocal, 0, true);
 }
@@ -44,8 +52,41 @@ void makeStub(Function &F, GlobalVariable &ImplPointer) {
   Builder.CreateRet(Call);
 }
 
+// Utility class for renaming global values and functions during partitioning.
+class GlobalRenamer {
+public:
+
+  static bool needsRenaming(const Value &New) {
+    if (!New.hasName() || New.getName().startswith("\01L"))
+      return true;
+    return false;
+  }
+
+  const std::string& getRename(const Value &Orig) {
+    // See if we have a name for this global.
+    {
+      auto I = Names.find(&Orig);
+      if (I != Names.end())
+        return I->second;
+    }
+
+    // Nope. Create a new one.
+    // FIXME: Use a more robust uniquing scheme. (This may blow up if the user
+    //        writes a "__orc_anon[[:digit:]]* method).
+    unsigned ID = Names.size();
+    std::ostringstream NameStream;
+    NameStream << "__orc_anon" << ID++;
+    auto I = Names.insert(std::make_pair(&Orig, NameStream.str()));
+    return I.first->second;
+  }
+private:
+  DenseMap<const Value*, std::string> Names;
+};
+
 void partition(Module &M, const ModulePartitionMap &PMap) {
 
+  GlobalRenamer Renamer;
+
   for (auto &KVPair : PMap) {
 
     auto ExtractGlobalVars =
@@ -54,20 +95,26 @@ void partition(Module &M, const ModulePartitionMap &PMap) {
         if (KVPair.second.count(&Orig)) {
           copyGVInitializer(New, Orig, VMap);
         }
-        if (New.getLinkage() == GlobalValue::PrivateLinkage) {
+        if (New.hasLocalLinkage()) {
+          if (Renamer.needsRenaming(New))
+            New.setName(Renamer.getRename(Orig));
           New.setLinkage(GlobalValue::ExternalLinkage);
           New.setVisibility(GlobalValue::HiddenVisibility);
         }
+        assert(!Renamer.needsRenaming(New) && "Invalid global name.");
       };
 
     auto ExtractFunctions =
       [&](Function &New, const Function &Orig, ValueToValueMapTy &VMap) {
         if (KVPair.second.count(&Orig))
           copyFunctionBody(New, Orig, VMap);
-        if (New.getLinkage() == GlobalValue::InternalLinkage) {
+        if (New.hasLocalLinkage()) {
+          if (Renamer.needsRenaming(New))
+            New.setName(Renamer.getRename(Orig));
           New.setLinkage(GlobalValue::ExternalLinkage);
           New.setVisibility(GlobalValue::HiddenVisibility);
         }
+        assert(!Renamer.needsRenaming(New) && "Invalid function name.");
       };
 
     CloneSubModule(*KVPair.first, M, ExtractGlobalVars, ExtractFunctions,
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 00e39bb..4023344 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -26,15 +26,21 @@ namespace orc {
 
 class OrcMCJITReplacement : public ExecutionEngine {
 
-  class ForwardingRTDyldMM : public RTDyldMemoryManager {
+  // OrcMCJITReplacement needs to do a little extra book-keeping to ensure that
+  // Orc's automatic finalization doesn't kick in earlier than MCJIT clients are
+  // expecting - see finalizeMemory.
+  class MCJITReplacementMemMgr : public MCJITMemoryManager {
   public:
-    ForwardingRTDyldMM(OrcMCJITReplacement &M) : M(M) {}
+    MCJITReplacementMemMgr(OrcMCJITReplacement &M,
+                           std::shared_ptr<MCJITMemoryManager> ClientMM)
+      : M(M), ClientMM(std::move(ClientMM)) {}
 
     uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID,
                                  StringRef SectionName) override {
       uint8_t *Addr =
-          M.MM->allocateCodeSection(Size, Alignment, SectionID, SectionName);
+          ClientMM->allocateCodeSection(Size, Alignment, SectionID,
+                                        SectionName);
       M.SectionsAllocatedSinceLastLoad.insert(Addr);
       return Addr;
     }
@@ -42,43 +48,35 @@ class OrcMCJITReplacement : public ExecutionEngine {
     uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID, StringRef SectionName,
                                  bool IsReadOnly) override {
-      uint8_t *Addr = M.MM->allocateDataSection(Size, Alignment, SectionID,
-                                                SectionName, IsReadOnly);
+      uint8_t *Addr = ClientMM->allocateDataSection(Size, Alignment, SectionID,
+                                                    SectionName, IsReadOnly);
       M.SectionsAllocatedSinceLastLoad.insert(Addr);
       return Addr;
     }
 
     void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
                                 uintptr_t DataSizeRW) override {
-      return M.MM->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
+      return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO,
+                                                DataSizeRW);
     }
 
     bool needsToReserveAllocationSpace() override {
-      return M.MM->needsToReserveAllocationSpace();
+      return ClientMM->needsToReserveAllocationSpace();
     }
 
     void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
                           size_t Size) override {
-      return M.MM->registerEHFrames(Addr, LoadAddr, Size);
+      return ClientMM->registerEHFrames(Addr, LoadAddr, Size);
     }
 
     void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
                             size_t Size) override {
-      return M.MM->deregisterEHFrames(Addr, LoadAddr, Size);
-    }
-
-    uint64_t getSymbolAddress(const std::string &Name) override {
-      return M.getSymbolAddressWithoutMangling(Name);
-    }
-
-    void *getPointerToNamedFunction(const std::string &Name,
-                                    bool AbortOnFailure = true) override {
-      return M.MM->getPointerToNamedFunction(Name, AbortOnFailure);
+      return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
     }
 
     void notifyObjectLoaded(ExecutionEngine *EE,
                             const object::ObjectFile &O) override {
-      return M.MM->notifyObjectLoaded(EE, O);
+      return ClientMM->notifyObjectLoaded(EE, O);
     }
 
     bool finalizeMemory(std::string *ErrMsg = nullptr) override {
@@ -96,21 +94,41 @@ class OrcMCJITReplacement : public ExecutionEngine {
       // get more than one set of objects loaded but not yet finalized is if
       // they were loaded during relocation of another set.
       if (M.UnfinalizedSections.size() == 1)
-        return M.MM->finalizeMemory(ErrMsg);
+        return ClientMM->finalizeMemory(ErrMsg);
       return false;
     }
 
   private:
     OrcMCJITReplacement &M;
+    std::shared_ptr<MCJITMemoryManager> ClientMM;
+  };
+
+  class LinkingResolver : public RuntimeDyld::SymbolResolver {
+  public:
+    LinkingResolver(OrcMCJITReplacement &M) : M(M) {}
+
+    RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
+      return M.findMangledSymbol(Name);
+    }
+
+    RuntimeDyld::SymbolInfo
+    findSymbolInLogicalDylib(const std::string &Name) override {
+      return M.ClientResolver->findSymbolInLogicalDylib(Name);
+    }
+
+  private:
+    OrcMCJITReplacement &M;
   };
 
 private:
 
   static ExecutionEngine *
   createOrcMCJITReplacement(std::string *ErrorMsg,
-                            std::unique_ptr<RTDyldMemoryManager> OrcJMM,
+                            std::shared_ptr<MCJITMemoryManager> MemMgr,
+                            std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
                             std::unique_ptr<TargetMachine> TM) {
-    return new OrcMCJITReplacement(std::move(OrcJMM), std::move(TM));
+    return new OrcMCJITReplacement(std::move(MemMgr), std::move(Resolver),
+                                   std::move(TM));
   }
 
 public:
@@ -118,12 +136,15 @@ public:
     OrcMCJITReplacementCtor = createOrcMCJITReplacement;
   }
 
-  OrcMCJITReplacement(std::unique_ptr<RTDyldMemoryManager> MM,
-                      std::unique_ptr<TargetMachine> TM)
-      : TM(std::move(TM)), MM(std::move(MM)), Mang(this->TM->getDataLayout()),
+  OrcMCJITReplacement(
+                    std::shared_ptr<MCJITMemoryManager> MemMgr,
+                    std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver,
+                    std::unique_ptr<TargetMachine> TM)
+      : TM(std::move(TM)), MemMgr(*this, std::move(MemMgr)),
+        Resolver(*this), ClientResolver(std::move(ClientResolver)),
+        Mang(this->TM->getDataLayout()),
         NotifyObjectLoaded(*this), NotifyFinalized(*this),
-        ObjectLayer(ObjectLayerT::CreateRTDyldMMFtor(), NotifyObjectLoaded,
-                    NotifyFinalized),
+        ObjectLayer(NotifyObjectLoaded, NotifyFinalized),
         CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)),
         LazyEmitLayer(CompileLayer) {
     setDataLayout(this->TM->getDataLayout());
@@ -139,15 +160,13 @@ public:
     Modules.push_back(std::move(M));
     std::vector<Module *> Ms;
     Ms.push_back(&*Modules.back());
-    LazyEmitLayer.addModuleSet(std::move(Ms),
-                               llvm::make_unique<ForwardingRTDyldMM>(*this));
+    LazyEmitLayer.addModuleSet(std::move(Ms), &MemMgr, &Resolver);
   }
 
   void addObjectFile(std::unique_ptr<object::ObjectFile> O) override {
     std::vector<std::unique_ptr<object::ObjectFile>> Objs;
     Objs.push_back(std::move(O));
-    ObjectLayer.addObjectSet(std::move(Objs),
-                             llvm::make_unique<ForwardingRTDyldMM>(*this));
+    ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
   }
 
   void addObjectFile(object::OwningBinary<object::ObjectFile> O) override {
@@ -157,8 +176,7 @@ public:
     std::vector<std::unique_ptr<object::ObjectFile>> Objs;
     Objs.push_back(std::move(Obj));
     auto H =
-      ObjectLayer.addObjectSet(std::move(Objs),
-                               llvm::make_unique<ForwardingRTDyldMM>(*this));
+      ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
 
     std::vector<std::unique_ptr<MemoryBuffer>> Bufs;
     Bufs.push_back(std::move(Buf));
@@ -170,7 +188,11 @@ public:
   }
 
   uint64_t getSymbolAddress(StringRef Name) {
-    return getSymbolAddressWithoutMangling(Mangle(Name));
+    return findSymbol(Name).getAddress();
+  }
+
+  RuntimeDyld::SymbolInfo findSymbol(StringRef Name) {
+    return findMangledSymbol(Mangle(Name));
   }
 
   void finalizeObject() override {
@@ -214,18 +236,19 @@ public:
   }
 
 private:
-  uint64_t getSymbolAddressWithoutMangling(StringRef Name) {
-    if (uint64_t Addr = LazyEmitLayer.findSymbol(Name, false).getAddress())
-      return Addr;
-    if (uint64_t Addr = MM->getSymbolAddress(Name))
-      return Addr;
-    if (uint64_t Addr = scanArchives(Name))
-      return Addr;
 
-    return 0;
+  RuntimeDyld::SymbolInfo findMangledSymbol(StringRef Name) {
+    if (auto Sym = LazyEmitLayer.findSymbol(Name, false))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+    if (auto Sym = ClientResolver->findSymbol(Name))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+    if (auto Sym = scanArchives(Name))
+      return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+
+    return nullptr;
   }
 
-  uint64_t scanArchives(StringRef Name) {
+  JITSymbol scanArchives(StringRef Name) {
     for (object::OwningBinary<object::Archive> &OB : Archives) {
       object::Archive *A = OB.getBinary();
       // Look for our symbols in each Archive
@@ -241,14 +264,13 @@ private:
           std::vector<std::unique_ptr<object::ObjectFile>> ObjSet;
           ObjSet.push_back(std::unique_ptr<object::ObjectFile>(
               static_cast<object::ObjectFile *>(ChildBin.release())));
-          ObjectLayer.addObjectSet(
-              std::move(ObjSet), llvm::make_unique<ForwardingRTDyldMM>(*this));
-          if (uint64_t Addr = ObjectLayer.findSymbol(Name, true).getAddress())
-            return Addr;
+          ObjectLayer.addObjectSet(std::move(ObjSet), &MemMgr, &Resolver);
+          if (auto Sym = ObjectLayer.findSymbol(Name, true))
+            return Sym;
         }
       }
     }
-    return 0;
+    return nullptr;
   }
 
   class NotifyObjectLoadedT {
@@ -267,7 +289,7 @@ private:
       assert(Objects.size() == Infos.size() &&
              "Incorrect number of Infos for Objects.");
       for (unsigned I = 0; I < Objects.size(); ++I)
-        M.MM->notifyObjectLoaded(&M, *Objects[I]);
+        M.MemMgr.notifyObjectLoaded(&M, *Objects[I]);
     };
 
   private:
@@ -299,7 +321,9 @@ private:
   typedef LazyEmittingLayer<CompileLayerT> LazyEmitLayerT;
 
   std::unique_ptr<TargetMachine> TM;
-  std::unique_ptr<RTDyldMemoryManager> MM;
+  MCJITReplacementMemMgr MemMgr;
+  LinkingResolver Resolver;
+  std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver;
   Mangler Mang;
 
   NotifyObjectLoadedT NotifyObjectLoaded;
diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
index 6fe5301..fc56e67 100644
--- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
@@ -6,39 +6,6 @@ using namespace llvm::orc;
 
 namespace {
 
-std::array<const char *, 12> X86GPRsToSave = {{
-    "rbp", "rbx", "r12", "r13", "r14", "r15", // Callee saved.
-    "rdi", "rsi", "rdx", "rcx", "r8", "r9",   // Int args.
-}};
-
-std::array<const char *, 8> X86XMMsToSave = {{
-    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" // FP args
-}};
-
-template <typename OStream> unsigned saveX86Regs(OStream &OS) {
-  for (const auto &GPR : X86GPRsToSave)
-    OS << "  pushq   %" << GPR << "\n";
-
-  OS << "  subq    $" << (16 * X86XMMsToSave.size()) << ", %rsp\n";
-
-  for (unsigned i = 0; i < X86XMMsToSave.size(); ++i)
-    OS << "  movdqu  %" << X86XMMsToSave[i] << ", "
-       << (16 * (X86XMMsToSave.size() - i - 1)) << "(%rsp)\n";
-
-  return (8 * X86GPRsToSave.size()) + (16 * X86XMMsToSave.size());
-}
-
-template <typename OStream> void restoreX86Regs(OStream &OS) {
-  for (unsigned i = 0; i < X86XMMsToSave.size(); ++i)
-    OS << "  movdqu  " << (16 * i) << "(%rsp), %"
-       << X86XMMsToSave[(X86XMMsToSave.size() - i - 1)] << "\n";
-  OS << "  addq    $" << (16 * X86XMMsToSave.size()) << ", %rsp\n";
-
-  for (unsigned i = 0; i < X86GPRsToSave.size(); ++i)
-    OS << "  popq    %" << X86GPRsToSave[X86GPRsToSave.size() - i - 1] << "\n";
-}
-
-template <typename TargetT>
 uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
                                 TargetAddress CallbackID) {
   return JCBM->executeCompileCallback(CallbackID);
@@ -53,14 +20,28 @@ const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
 
 void OrcX86_64::insertResolverBlock(
     Module &M, JITCompileCallbackManagerBase &JCBM) {
+
+  // Trampoline code-sequence length, used to get trampoline address from return
+  // address.
   const unsigned X86_64_TrampolineLength = 6;
-  auto CallbackPtr = executeCompileCallback<OrcX86_64>;
+
+  // List of x86-64 GPRs to save. Note - RBP saved separately below.
+  std::array<const char *, 14> GPRs = {{
+      "rax", "rbx", "rcx", "rdx",
+      "rsi", "rdi", "r8", "r9",
+      "r10", "r11", "r12", "r13",
+      "r14", "r15"
+    }};
+
+  // Address of the executeCompileCallback function.
   uint64_t CallbackAddr =
-      static_cast<uint64_t>(reinterpret_cast<uintptr_t>(CallbackPtr));
+      static_cast<uint64_t>(
+        reinterpret_cast<uintptr_t>(executeCompileCallback));
 
   std::ostringstream AsmStream;
   Triple TT(M.getTargetTriple());
 
+  // Switch to text section.
   if (TT.getOS() == Triple::Darwin)
     AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
               << ".align 4, 0x90\n";
@@ -68,24 +49,51 @@ void OrcX86_64::insertResolverBlock(
     AsmStream << ".text\n"
               << ".align 16, 0x90\n";
 
+  // Bake in a pointer to the callback manager immediately before the
+  // start of the resolver function.
   AsmStream << "jit_callback_manager_addr:\n"
-            << "  .quad " << &JCBM << "\n"
-            << ResolverBlockName << ":\n";
-
-  uint64_t ReturnAddrOffset = saveX86Regs(AsmStream);
-
-  // Compute index, load object address, and call JIT.
-  AsmStream << "  leaq    jit_callback_manager_addr(%rip), %rdi\n"
+            << "  .quad " << &JCBM << "\n";
+
+  // Start the resolver function.
+  AsmStream << ResolverBlockName << ":\n"
+            << "  pushq   %rbp\n"
+            << "  movq    %rsp, %rbp\n";
+
+  // Store the GPRs.
+  for (const auto &GPR : GPRs)
+    AsmStream << "  pushq   %" << GPR << "\n";
+
+  // Store floating-point state with FXSAVE.
+  // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd
+  //       number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add
+  //       an extra 64 bits of padding to the FXSave area.
+  unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0;
+  unsigned FXSaveSize = 512 + Padding;
+  AsmStream << "  subq    $" << FXSaveSize << ", %rsp\n"
+            << "  fxsave  (%rsp)\n"
+
+  // Load callback manager address, compute trampoline address, call JIT.
+            << "  lea     jit_callback_manager_addr(%rip), %rdi\n"
             << "  movq    (%rdi), %rdi\n"
-            << "  movq    " << ReturnAddrOffset << "(%rsp), %rsi\n"
+            << "  movq    0x8(%rbp), %rsi\n"
             << "  subq    $" << X86_64_TrampolineLength << ", %rsi\n"
             << "  movabsq $" << CallbackAddr << ", %rax\n"
             << "  callq   *%rax\n"
-            << "  movq    %rax, " << ReturnAddrOffset << "(%rsp)\n";
 
-  restoreX86Regs(AsmStream);
+  // Replace the return to the trampoline with the return address of the
+  // compiled function body.
+            << "  movq    %rax, 0x8(%rbp)\n"
+
+  // Restore the floating point state.
+            << "  fxrstor (%rsp)\n"
+            << "  addq    $" << FXSaveSize << ", %rsp\n";
+
+  for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
+    AsmStream << "  popq    %" << GPR << "\n";
 
-  AsmStream << "  retq\n";
+  // Restore original RBP and return to compiled function body.
+  AsmStream << "  popq    %rbp\n"
+            << "  retq\n";
 
   M.appendModuleInlineAsm(AsmStream.str());
 }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index a0ed7cf..a13ecb7 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -57,7 +57,8 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
   unsigned BytesRemaining = S.Size;
 
   if (StartPadding) {
-    dbgs() << "\n" << format("0x%016" PRIx64, LoadAddr & ~(ColsPerRow - 1)) << ":";
+    dbgs() << "\n" << format("0x%016" PRIx64,
+                             LoadAddr & ~(uint64_t)(ColsPerRow - 1)) << ":";
     while (StartPadding--)
       dbgs() << "   ";
   }
@@ -92,7 +93,7 @@ void RuntimeDyldImpl::resolveRelocations() {
     // entry provides the section to which the relocation will be applied.
     uint64_t Addr = Sections[i].LoadAddress;
     DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t"
-                 << format("0x%x", Addr) << "\n");
+                 << format("%p", (uintptr_t)Addr) << "\n");
     DEBUG(dumpSectionMemory(Sections[i], "before relocations"));
     resolveRelocationList(Relocations[i], Addr);
     DEBUG(dumpSectionMemory(Sections[i], "after relocations"));
@@ -151,10 +152,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
 
   // Compute the memory size required to load all sections to be loaded
   // and pass this information to the memory manager
-  if (MemMgr->needsToReserveAllocationSpace()) {
+  if (MemMgr.needsToReserveAllocationSpace()) {
     uint64_t CodeSize = 0, DataSizeRO = 0, DataSizeRW = 0;
     computeTotalAllocSize(Obj, CodeSize, DataSizeRO, DataSizeRW);
-    MemMgr->reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
+    MemMgr.reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
   }
 
   // Used sections from the object file
@@ -360,19 +361,20 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
       if (Name == ".eh_frame")
         SectionSize += 4;
 
-      if (SectionSize > 0) {
-        // save the total size of the section
-        if (IsCode) {
-          CodeSectionSizes.push_back(SectionSize);
-        } else if (IsReadOnly) {
-          ROSectionSizes.push_back(SectionSize);
-        } else {
-          RWSectionSizes.push_back(SectionSize);
-        }
-        // update the max alignment
-        if (Alignment > MaxAlignment) {
-          MaxAlignment = Alignment;
-        }
+      if (!SectionSize)
+        SectionSize = 1;
+
+      if (IsCode) {
+        CodeSectionSizes.push_back(SectionSize);
+      } else if (IsReadOnly) {
+        ROSectionSizes.push_back(SectionSize);
+      } else {
+        RWSectionSizes.push_back(SectionSize);
+      }
+
+      // update the max alignment
+      if (Alignment > MaxAlignment) {
+        MaxAlignment = Alignment;
       }
     }
   }
@@ -485,7 +487,7 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
 
     // Skip common symbols already elsewhere.
     if (GlobalSymbolTable.count(Name) ||
-        MemMgr->getSymbolAddressInLogicalDylib(Name)) {
+        Resolver.findSymbolInLogicalDylib(Name)) {
       DEBUG(dbgs() << "\tSkipping already emitted common symbol '" << Name
                    << "'\n");
       continue;
@@ -502,8 +504,8 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
 
   // Allocate memory for the section
   unsigned SectionID = Sections.size();
-  uint8_t *Addr = MemMgr->allocateDataSection(CommonSize, sizeof(void *),
-                                              SectionID, StringRef(), false);
+  uint8_t *Addr = MemMgr.allocateDataSection(CommonSize, sizeof(void *),
+                                             SectionID, StringRef(), false);
   if (!Addr)
     report_fatal_error("Unable to allocate memory for common symbols!");
   uint64_t Offset = 0;
@@ -577,10 +579,12 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   if (IsRequired) {
     Check(Section.getContents(data));
     Allocate = DataSize + PaddingSize + StubBufSize;
-    Addr = IsCode ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID,
-                                                Name)
-                  : MemMgr->allocateDataSection(Allocate, Alignment, SectionID,
-                                                Name, IsReadOnly);
+    if (!Allocate)
+      Allocate = 1;
+    Addr = IsCode ? MemMgr.allocateCodeSection(Allocate, Alignment, SectionID,
+                                               Name)
+                  : MemMgr.allocateDataSection(Allocate, Alignment, SectionID,
+                                               Name, IsReadOnly);
     if (!Addr)
       report_fatal_error("Unable to allocate section memory!");
 
@@ -787,9 +791,9 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
       uint64_t Addr = 0;
       RTDyldSymbolTable::const_iterator Loc = GlobalSymbolTable.find(Name);
       if (Loc == GlobalSymbolTable.end()) {
-        // This is an external symbol, try to get its address from
-        // MemoryManager.
-        Addr = MemMgr->getSymbolAddress(Name.data());
+        // This is an external symbol, try to get its address from the symbol
+        // resolver.
+        Addr = Resolver.findSymbol(Name.data()).getAddress();
         // The call to getSymbolAddress may have caused additional modules to
         // be loaded, which may have added new entries to the
         // ExternalSymbolRelocations map.  Consquently, we need to update our
@@ -810,7 +814,6 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
         report_fatal_error("Program used external function '" + Name +
                            "' which could not be resolved!");
 
-      updateGOTEntries(Name, Addr);
       DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t"
                    << format("0x%lx", Addr) << "\n");
       // This list may have been updated when we called getSymbolAddress, so
@@ -835,7 +838,12 @@ uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress(
   return 0;
 }
 
-RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+void RuntimeDyld::MemoryManager::anchor() {}
+void RuntimeDyld::SymbolResolver::anchor() {}
+
+RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr,
+                         RuntimeDyld::SymbolResolver &Resolver)
+    : MemMgr(MemMgr), Resolver(Resolver) {
   // FIXME: There's a potential issue lurking here if a single instance of
   // RuntimeDyld is used to load multiple objects.  The current implementation
   // associates a single memory manager with a RuntimeDyld instance.  Even
@@ -843,7 +851,6 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
   // they share a single memory manager.  This can become a problem when page
   // permissions are applied.
   Dyld = nullptr;
-  MM = mm;
   ProcessAllSections = false;
   Checker = nullptr;
 }
@@ -851,27 +858,33 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
 RuntimeDyld::~RuntimeDyld() {}
 
 static std::unique_ptr<RuntimeDyldCOFF>
-createRuntimeDyldCOFF(Triple::ArchType Arch, RTDyldMemoryManager *MM,
+createRuntimeDyldCOFF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                      RuntimeDyld::SymbolResolver &Resolver,
                       bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldCOFF> Dyld(RuntimeDyldCOFF::create(Arch, MM));
+  std::unique_ptr<RuntimeDyldCOFF> Dyld =
+    RuntimeDyldCOFF::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
 }
 
 static std::unique_ptr<RuntimeDyldELF>
-createRuntimeDyldELF(RTDyldMemoryManager *MM, bool ProcessAllSections,
-                     RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM));
+createRuntimeDyldELF(RuntimeDyld::MemoryManager &MM,
+                     RuntimeDyld::SymbolResolver &Resolver,
+                     bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
+  std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM, Resolver));
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
 }
 
 static std::unique_ptr<RuntimeDyldMachO>
-createRuntimeDyldMachO(Triple::ArchType Arch, RTDyldMemoryManager *MM,
-                       bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldMachO> Dyld(RuntimeDyldMachO::create(Arch, MM));
+createRuntimeDyldMachO(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                       RuntimeDyld::SymbolResolver &Resolver,
+                       bool ProcessAllSections,
+                       RuntimeDyldCheckerImpl *Checker) {
+  std::unique_ptr<RuntimeDyldMachO> Dyld =
+    RuntimeDyldMachO::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
@@ -881,14 +894,14 @@ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
 RuntimeDyld::loadObject(const ObjectFile &Obj) {
   if (!Dyld) {
     if (Obj.isELF())
-      Dyld = createRuntimeDyldELF(MM, ProcessAllSections, Checker);
+      Dyld = createRuntimeDyldELF(MemMgr, Resolver, ProcessAllSections, Checker);
     else if (Obj.isMachO())
       Dyld = createRuntimeDyldMachO(
-               static_cast<Triple::ArchType>(Obj.getArch()), MM,
+               static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
                ProcessAllSections, Checker);
     else if (Obj.isCOFF())
       Dyld = createRuntimeDyldCOFF(
-               static_cast<Triple::ArchType>(Obj.getArch()), MM,
+               static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
                ProcessAllSections, Checker);
     else
       report_fatal_error("Incompatible object format!");
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 56bcb8e..8055d55 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -40,13 +40,15 @@ public:
 namespace llvm {
 
 std::unique_ptr<RuntimeDyldCOFF>
-llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) {
+llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
+                              RuntimeDyld::MemoryManager &MemMgr,
+                              RuntimeDyld::SymbolResolver &Resolver) {
   switch (Arch) {
   default:
     llvm_unreachable("Unsupported target for RuntimeDyldCOFF.");
     break;
   case Triple::x86_64:
-    return make_unique<RuntimeDyldCOFFX86_64>(MM);
+    return make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
   }
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
index 681a3e5..32b8fa2 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
@@ -31,11 +31,15 @@ public:
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &Obj) override;
   bool isCompatibleFile(const object::ObjectFile &Obj) const override;
-  static std::unique_ptr<RuntimeDyldCOFF> create(Triple::ArchType Arch,
-                                                 RTDyldMemoryManager *MM);
+
+  static std::unique_ptr<RuntimeDyldCOFF>
+  create(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MemMgr,
+         RuntimeDyld::SymbolResolver &Resolver);
 
 protected:
-  RuntimeDyldCOFF(RTDyldMemoryManager *MM) : RuntimeDyldImpl(MM) {}
+  RuntimeDyldCOFF(RuntimeDyld::MemoryManager &MemMgr,
+                  RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldImpl(MemMgr, Resolver) {}
   uint64_t getSymbolOffset(const SymbolRef &Sym);
 };
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index c991408..957571b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -738,7 +738,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
 uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
   if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
     return InternalSymbol.getAddress();
-  return getRTDyld().MemMgr->getSymbolAddress(Symbol);
+  return getRTDyld().Resolver.findSymbol(Symbol).getAddress();
 }
 
 uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index e8d299a..69d2a7d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -19,6 +19,7 @@ class RuntimeDyldCheckerImpl {
   friend class RuntimeDyldChecker;
   friend class RuntimeDyldImpl;
   friend class RuntimeDyldCheckerExprEval;
+  friend class RuntimeDyldELF;
 
 public:
   RuntimeDyldCheckerImpl(RuntimeDyld &RTDyld, MCDisassembler *Disassembler,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 6278170..bbffdfb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "RuntimeDyldELF.h"
+#include "RuntimeDyldCheckerImpl.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -183,32 +184,30 @@ LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const {
 
 namespace llvm {
 
-RuntimeDyldELF::RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+RuntimeDyldELF::RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
+                               RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldImpl(MemMgr, Resolver), GOTSectionID(0), CurrentGOTIndex(0) {}
 RuntimeDyldELF::~RuntimeDyldELF() {}
 
 void RuntimeDyldELF::registerEHFrames() {
-  if (!MemMgr)
-    return;
   for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
     SID EHFrameSID = UnregisteredEHFrameSections[i];
     uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
     uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
     size_t EHFrameSize = Sections[EHFrameSID].Size;
-    MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+    MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
     RegisteredEHFrameSections.push_back(EHFrameSID);
   }
   UnregisteredEHFrameSections.clear();
 }
 
 void RuntimeDyldELF::deregisterEHFrames() {
-  if (!MemMgr)
-    return;
   for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) {
     SID EHFrameSID = RegisteredEHFrameSections[i];
     uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
     uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
     size_t EHFrameSize = Sections[EHFrameSID].Size;
-    MemMgr->deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+    MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
   }
   RegisteredEHFrameSections.clear();
 }
@@ -247,27 +246,16 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
                  << format("%p\n", Section.Address + Offset));
     break;
   }
-  case ELF::R_X86_64_GOTPCREL: {
-    // findGOTEntry returns the 'G + GOT' part of the relocation calculation
-    // based on the load/target address of the GOT (not the current/local addr).
-    uint64_t GOTAddr = findGOTEntry(Value, SymOffset);
-    uint64_t FinalAddress = Section.LoadAddress + Offset;
-    // The processRelocationRef method combines the symbol offset and the addend
-    // and in most cases that's what we want.  For this relocation type, we need
-    // the raw addend, so we subtract the symbol offset to get it.
-    int64_t RealOffset = GOTAddr + Addend - SymOffset - FinalAddress;
-    assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
-    int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
-    support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
-    break;
-  }
   case ELF::R_X86_64_PC32: {
     // Get the placeholder value from the generated object since
     // a previous relocation attempt may have overwritten the loaded version
     support::ulittle32_t::ref Placeholder(
         (void *)(Section.ObjAddress + Offset));
     uint64_t FinalAddress = Section.LoadAddress + Offset;
-    int64_t RealOffset = Placeholder + Value + Addend - FinalAddress;
+    int64_t RealOffset = Value + Addend - FinalAddress;
+    // Don't add the placeholder if this is a stub
+    if (Offset < Section.Size)
+      RealOffset += Placeholder;
     assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
     int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
     support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
@@ -279,8 +267,10 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
     support::ulittle64_t::ref Placeholder(
         (void *)(Section.ObjAddress + Offset));
     uint64_t FinalAddress = Section.LoadAddress + Offset;
-    support::ulittle64_t::ref(Section.Address + Offset) =
-        Placeholder + Value + Addend - FinalAddress;
+    int64_t RealOffset = Value + Addend - FinalAddress;
+    if (Offset < Section.Size)
+      RealOffset += Placeholder;
+    support::ulittle64_t::ref(Section.Address + Offset) = RealOffset;
     break;
   }
   }
@@ -1325,16 +1315,18 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
         Stubs[Value] = StubOffset;
         createStubFunction((uint8_t *)StubAddress);
 
-        // Create a GOT entry for the external function.
-        GOTEntries.push_back(Value);
-
-        // Make our stub function a relative call to the GOT entry.
-        RelocationEntry RE(SectionID, StubOffset + 2, ELF::R_X86_64_GOTPCREL,
-                           -4);
-        addRelocationForSymbol(RE, Value.SymbolName);
-
         // Bump our stub offset counter
         Section.StubOffset = StubOffset + getMaxStubSize();
+
+        // Allocate a GOT Entry
+        uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+
+        // The load of the GOT address has an addend of -4
+        resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4);
+
+        // Fill in the value of the symbol we're targeting into the GOT
+        addRelocationForSymbol(computeGOTOffsetRE(SectionID,GOTOffset,0,ELF::R_X86_64_64),
+            Value.SymbolName);
       }
 
       // Make the target call a call into the stub table.
@@ -1345,10 +1337,17 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
                          Value.Offset);
       addRelocationForSection(RE, Value.SectionID);
     }
+  } else if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) {
+    uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+    resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend);
+
+    // Fill in the value of the symbol we're targeting into the GOT
+    RelocationEntry RE = computeGOTOffsetRE(SectionID, GOTOffset, Value.Offset, ELF::R_X86_64_64);
+    if (Value.SymbolName)
+      addRelocationForSymbol(RE, Value.SymbolName);
+    else
+      addRelocationForSection(RE, Value.SectionID);
   } else {
-    if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) {
-      GOTEntries.push_back(Value);
-    }
     RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, Value.Offset);
     if (Value.SymbolName)
       addRelocationForSymbol(RE, Value.SymbolName);
@@ -1358,22 +1357,6 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
   return ++RelI;
 }
 
-void RuntimeDyldELF::updateGOTEntries(StringRef Name, uint64_t Addr) {
-
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::iterator it;
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::iterator end = GOTs.end();
-
-  for (it = GOTs.begin(); it != end; ++it) {
-    GOTRelocations &GOTEntries = it->second;
-    for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
-      if (GOTEntries[i].SymbolName != nullptr &&
-          GOTEntries[i].SymbolName == Name) {
-        GOTEntries[i].Offset = Addr;
-      }
-    }
-  }
-}
-
 size_t RuntimeDyldELF::getGOTEntrySize() {
   // We don't use the GOT in all of these cases, but it's essentially free
   // to put them all here.
@@ -1400,83 +1383,53 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
   return Result;
 }
 
-uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, uint64_t Offset) {
-
-  const size_t GOTEntrySize = getGOTEntrySize();
-
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::const_iterator it;
-  SmallVectorImpl<std::pair<SID, GOTRelocations>>::const_iterator end =
-      GOTs.end();
-
-  int GOTIndex = -1;
-  for (it = GOTs.begin(); it != end; ++it) {
-    SID GOTSectionID = it->first;
-    const GOTRelocations &GOTEntries = it->second;
-
-    // Find the matching entry in our vector.
-    uint64_t SymbolOffset = 0;
-    for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
-      if (!GOTEntries[i].SymbolName) {
-        if (getSectionLoadAddress(GOTEntries[i].SectionID) == LoadAddress &&
-            GOTEntries[i].Offset == Offset) {
-          GOTIndex = i;
-          SymbolOffset = GOTEntries[i].Offset;
-          break;
-        }
-      } else {
-        // GOT entries for external symbols use the addend as the address when
-        // the external symbol has been resolved.
-        if (GOTEntries[i].Offset == LoadAddress) {
-          GOTIndex = i;
-          // Don't use the Addend here.  The relocation handler will use it.
-          break;
-        }
-      }
-    }
-
-    if (GOTIndex != -1) {
-      if (GOTEntrySize == sizeof(uint64_t)) {
-        uint64_t *LocalGOTAddr = (uint64_t *)getSectionAddress(GOTSectionID);
-        // Fill in this entry with the address of the symbol being referenced.
-        LocalGOTAddr[GOTIndex] = LoadAddress + SymbolOffset;
-      } else {
-        uint32_t *LocalGOTAddr = (uint32_t *)getSectionAddress(GOTSectionID);
-        // Fill in this entry with the address of the symbol being referenced.
-        LocalGOTAddr[GOTIndex] = (uint32_t)(LoadAddress + SymbolOffset);
-      }
-
-      // Calculate the load address of this entry
-      return getSectionLoadAddress(GOTSectionID) + (GOTIndex * GOTEntrySize);
-    }
+uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no)
+{
+  (void)SectionID; // The GOT Section is the same for all section in the object file
+  if (GOTSectionID == 0) {
+    GOTSectionID = Sections.size();
+    // Reserve a section id. We'll allocate the section later
+    // once we know the total size
+    Sections.push_back(SectionEntry(".got", 0, 0, 0));
   }
+  uint64_t StartOffset = CurrentGOTIndex * getGOTEntrySize();
+  CurrentGOTIndex += no;
+  return StartOffset;
+}
 
-  assert(GOTIndex != -1 && "Unable to find requested GOT entry.");
-  return 0;
+void RuntimeDyldELF::resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset, uint64_t GOTOffset)
+{
+  // Fill in the relative address of the GOT Entry into the stub
+  RelocationEntry GOTRE(SectionID, Offset, ELF::R_X86_64_PC32, GOTOffset);
+  addRelocationForSection(GOTRE, GOTSectionID);
+}
+
+RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(unsigned SectionID, uint64_t GOTOffset, uint64_t SymbolOffset,
+                                                   uint32_t Type)
+{
+  (void)SectionID; // The GOT Section is the same for all section in the object file
+  return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset);
 }
 
 void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
                                   ObjSectionToIDMap &SectionMap) {
   // If necessary, allocate the global offset table
-  if (MemMgr) {
-    // Allocate the GOT if necessary
-    size_t numGOTEntries = GOTEntries.size();
-    if (numGOTEntries != 0) {
-      // Allocate memory for the section
-      unsigned SectionID = Sections.size();
-      size_t TotalSize = numGOTEntries * getGOTEntrySize();
-      uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, getGOTEntrySize(),
-                                                  SectionID, ".got", false);
-      if (!Addr)
-        report_fatal_error("Unable to allocate memory for GOT!");
-
-      GOTs.push_back(std::make_pair(SectionID, GOTEntries));
-      Sections.push_back(SectionEntry(".got", Addr, TotalSize, 0));
-      // For now, initialize all GOT entries to zero.  We'll fill them in as
-      // needed when GOT-based relocations are applied.
-      memset(Addr, 0, TotalSize);
-    }
-  } else {
-    report_fatal_error("Unable to allocate memory for GOT!");
+  if (GOTSectionID != 0) {
+    // Allocate memory for the section
+    size_t TotalSize = CurrentGOTIndex * getGOTEntrySize();
+    uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(),
+                                                GOTSectionID, ".got", false);
+    if (!Addr)
+      report_fatal_error("Unable to allocate memory for GOT!");
+
+    Sections[GOTSectionID] = SectionEntry(".got", Addr, TotalSize, 0);
+
+    if (Checker)
+      Checker->registerSection(Obj.getFileName(), GOTSectionID);
+
+    // For now, initialize all GOT entries to zero.  We'll fill them in as
+    // needed when GOT-based relocations are applied.
+    memset(Addr, 0, TotalSize);
   }
 
   // Look for and record the EH frame section.
@@ -1490,6 +1443,9 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
       break;
     }
   }
+
+  GOTSectionID = 0;
+  CurrentGOTIndex = 0;
 }
 
 bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 71260d0..590d26a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -80,16 +80,32 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
                            ObjSectionToIDMap &LocalSections,
                            RelocationValueRef &Rel);
 
-  uint64_t findGOTEntry(uint64_t LoadAddr, uint64_t Offset);
   size_t getGOTEntrySize();
 
-  void updateGOTEntries(StringRef Name, uint64_t Addr) override;
+  SectionEntry &getSection(unsigned SectionID) { return Sections[SectionID]; }
 
-  // Relocation entries for symbols whose position-independent offset is
-  // updated in a global offset table.
-  typedef SmallVector<RelocationValueRef, 2> GOTRelocations;
-  GOTRelocations GOTEntries; // List of entries requiring finalization.
-  SmallVector<std::pair<SID, GOTRelocations>, 8> GOTs; // Allocated tables.
+  // Allocate no GOT entries for use in the given section.
+  uint64_t allocateGOTEntries(unsigned SectionID, unsigned no);
+
+  // Resolve the relvative address of GOTOffset in Section ID and place
+  // it at the given Offset
+  void resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset,
+                                  uint64_t GOTOffset);
+
+  // For a GOT entry referenced from SectionID, compute a relocation entry
+  // that will place the final resolved value in the GOT slot
+  RelocationEntry computeGOTOffsetRE(unsigned SectionID,
+                                     uint64_t GOTOffset,
+                                     uint64_t SymbolOffset,
+                                     unsigned Type);
+
+  // The tentative ID for the GOT section
+  unsigned GOTSectionID;
+
+  // Records the current number of allocated slots in the GOT
+  // (This would be equivalent to GOTEntries.size() were it not for relocations
+  // that consume more than one slot)
+  unsigned CurrentGOTIndex;
 
   // When a module is loaded we save the SectionID of the EH frame section
   // in a table until we receive a request to register all unregistered
@@ -98,8 +114,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   SmallVector<SID, 2> RegisteredEHFrameSections;
 
 public:
-  RuntimeDyldELF(RTDyldMemoryManager *mm);
-  virtual ~RuntimeDyldELF();
+  RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
+                 RuntimeDyld::SymbolResolver &Resolver);
+  ~RuntimeDyldELF() override;
 
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &O) override;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 05060dd..ee51a75 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
 #include "llvm/Object/ObjectFile.h"
@@ -51,7 +52,7 @@ class Twine;
 class SectionEntry {
 public:
   /// Name - section name.
-  StringRef Name;
+  std::string Name;
 
   /// Address - address in the linker's memory where the section resides.
   uint8_t *Address;
@@ -188,7 +189,10 @@ class RuntimeDyldImpl {
   friend class RuntimeDyldCheckerImpl;
 protected:
   // The MemoryManager to load objects into.
-  RTDyldMemoryManager *MemMgr;
+  RuntimeDyld::MemoryManager &MemMgr;
+
+  // The symbol resolver to use for external symbols.
+  RuntimeDyld::SymbolResolver &Resolver;
 
   // Attached RuntimeDyldChecker instance. Null if no instance attached.
   RuntimeDyldCheckerImpl *Checker;
@@ -357,10 +361,6 @@ protected:
   /// \brief Resolve relocations to external symbols.
   void resolveExternalSymbols();
 
-  /// \brief Update GOT entries for external symbols.
-  // The base class does nothing.  ELF overrides this.
-  virtual void updateGOTEntries(StringRef Name, uint64_t Addr) {}
-
   // \brief Compute an upper bound of the memory that is required to load all
   // sections
   void computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize,
@@ -374,8 +374,10 @@ protected:
   std::pair<unsigned, unsigned> loadObjectImpl(const object::ObjectFile &Obj);
 
 public:
-  RuntimeDyldImpl(RTDyldMemoryManager *mm)
-    : MemMgr(mm), Checker(nullptr), ProcessAllSections(false), HasError(false) {
+  RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr,
+                  RuntimeDyld::SymbolResolver &Resolver)
+    : MemMgr(MemMgr), Resolver(Resolver), Checker(nullptr),
+      ProcessAllSections(false), HasError(false) {
   }
 
   virtual ~RuntimeDyldImpl();
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 2d39662..675063c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -178,25 +178,30 @@ bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile &Obj) const {
 }
 
 template <typename Impl>
-void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &ObjImg,
+void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
                                                   ObjSectionToIDMap &SectionMap) {
   unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
   unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
   unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
-  ObjSectionToIDMap::iterator i, e;
 
-  for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
-    const SectionRef &Section = i->first;
+  for (const auto &Section : Obj.sections()) {
     StringRef Name;
     Section.getName(Name);
-    if (Name == "__eh_frame")
-      EHFrameSID = i->second;
-    else if (Name == "__text")
-      TextSID = i->second;
+
+    // Force emission of the __text, __eh_frame, and __gcc_except_tab sections
+    // if they're present. Otherwise call down to the impl to handle other
+    // sections that have already been emitted.
+    if (Name == "__text")
+      TextSID = findOrEmitSection(Obj, Section, true, SectionMap);
+    else if (Name == "__eh_frame")
+      EHFrameSID = findOrEmitSection(Obj, Section, false, SectionMap);
     else if (Name == "__gcc_except_tab")
-      ExceptTabSID = i->second;
-    else
-      impl().finalizeSection(ObjImg, i->second, Section);
+      ExceptTabSID = findOrEmitSection(Obj, Section, true, SectionMap);
+    else {
+      auto I = SectionMap.find(Section);
+      if (I != SectionMap.end())
+        impl().finalizeSection(Obj, I->second, Section);
+    }
   }
   UnregisteredEHFrameSections.push_back(
     EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
@@ -239,7 +244,8 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
 }
 
 static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
-  int64_t ObjDistance = A->ObjAddress - B->ObjAddress;
+  int64_t ObjDistance =
+    static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress);
   int64_t MemDistance = A->LoadAddress - B->LoadAddress;
   return ObjDistance - MemDistance;
 }
@@ -247,8 +253,6 @@ static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
 template <typename Impl>
 void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
 
-  if (!MemMgr)
-    return;
   for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
     EHFrameRelatedSections &SectionInfo = UnregisteredEHFrameSections[i];
     if (SectionInfo.EHFrameSID == RTDYLD_INVALID_SECTION_ID ||
@@ -271,22 +275,28 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
       P = processFDE(P, DeltaForText, DeltaForEH);
     } while (P != End);
 
-    MemMgr->registerEHFrames(EHFrame->Address, EHFrame->LoadAddress,
-                             EHFrame->Size);
+    MemMgr.registerEHFrames(EHFrame->Address, EHFrame->LoadAddress,
+                            EHFrame->Size);
   }
   UnregisteredEHFrameSections.clear();
 }
 
 std::unique_ptr<RuntimeDyldMachO>
-RuntimeDyldMachO::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) {
+RuntimeDyldMachO::create(Triple::ArchType Arch,
+                         RuntimeDyld::MemoryManager &MemMgr,
+                         RuntimeDyld::SymbolResolver &Resolver) {
   switch (Arch) {
   default:
     llvm_unreachable("Unsupported target for RuntimeDyldMachO.");
     break;
-  case Triple::arm: return make_unique<RuntimeDyldMachOARM>(MM);
-  case Triple::aarch64: return make_unique<RuntimeDyldMachOAArch64>(MM);
-  case Triple::x86: return make_unique<RuntimeDyldMachOI386>(MM);
-  case Triple::x86_64: return make_unique<RuntimeDyldMachOX86_64>(MM);
+  case Triple::arm:
+    return make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver);
+  case Triple::aarch64:
+    return make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
+  case Triple::x86:
+    return make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver);
+  case Triple::x86_64:
+    return make_unique<RuntimeDyldMachOX86_64>(MemMgr, Resolver);
   }
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index f8bfc03..45a94ba 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -49,7 +49,9 @@ protected:
   // EH frame sections with the memory manager.
   SmallVector<EHFrameRelatedSections, 2> UnregisteredEHFrameSections;
 
-  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+  RuntimeDyldMachO(RuntimeDyld::MemoryManager &MemMgr,
+                   RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldImpl(MemMgr, Resolver) {}
 
   /// This convenience method uses memcpy to extract a contiguous addend (the
   /// addend size and offset are taken from the corresponding fields of the RE).
@@ -114,8 +116,10 @@ protected:
 public:
 
   /// Create a RuntimeDyldMachO instance for the given target architecture.
-  static std::unique_ptr<RuntimeDyldMachO> create(Triple::ArchType Arch,
-                                                  RTDyldMemoryManager *mm);
+  static std::unique_ptr<RuntimeDyldMachO>
+  create(Triple::ArchType Arch,
+         RuntimeDyld::MemoryManager &MemMgr,
+         RuntimeDyld::SymbolResolver &Resolver);
 
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &O) override;
@@ -142,7 +146,9 @@ private:
                             int64_t DeltaForEH);
 
 public:
-  RuntimeDyldMachOCRTPBase(RTDyldMemoryManager *mm) : RuntimeDyldMachO(mm) {}
+  RuntimeDyldMachOCRTPBase(RuntimeDyld::MemoryManager &MemMgr,
+                           RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldMachO(MemMgr, Resolver) {}
 
   void finalizeLoad(const ObjectFile &Obj,
                     ObjSectionToIDMap &SectionMap) override;
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index ce2f4a2..cd534a1 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -32,7 +32,9 @@ private:
   SmallVector<SID, 2> RegisteredEHFrameSections;
 
 public:
-  RuntimeDyldCOFFX86_64(RTDyldMemoryManager *MM) : RuntimeDyldCOFF(MM) {}
+  RuntimeDyldCOFFX86_64(RuntimeDyld::MemoryManager &MM,
+                        RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldCOFF(MM, Resolver) {}
 
   unsigned getMaxStubSize() override {
     return 6; // 2-byte jmp instruction + 32-bit relative address
@@ -177,13 +179,11 @@ public:
 
   unsigned getStubAlignment() override { return 1; }
   void registerEHFrames() override {
-    if (!MemMgr)
-      return;
     for (auto const &EHFrameSID : UnregisteredEHFrameSections) {
       uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
       uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
       size_t EHFrameSize = Sections[EHFrameSID].Size;
-      MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+      MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
       RegisteredEHFrameSections.push_back(EHFrameSID);
     }
     UnregisteredEHFrameSections.clear();
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 196fa62..99fd6e3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -23,8 +23,9 @@ public:
 
   typedef uint64_t TargetPtrT;
 
-  RuntimeDyldMachOAArch64(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOAArch64(RuntimeDyld::MemoryManager &MM,
+                          RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 09e430e..09e51f2 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -25,7 +25,9 @@ public:
 
   typedef uint32_t TargetPtrT;
 
-  RuntimeDyldMachOARM(RTDyldMemoryManager *MM) : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOARM(RuntimeDyld::MemoryManager &MM,
+                      RuntimeDyld::SymbolResolver &Resolver)
+    : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 67d7027..053f90c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -22,8 +22,9 @@ public:
 
   typedef uint32_t TargetPtrT;
 
-  RuntimeDyldMachOI386(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOI386(RuntimeDyld::MemoryManager &MM,
+                       RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 0; }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 0734017..4b3b01b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -22,8 +22,9 @@ public:
 
   typedef uint64_t TargetPtrT;
 
-  RuntimeDyldMachOX86_64(RTDyldMemoryManager *MM)
-      : RuntimeDyldMachOCRTPBase(MM) {}
+  RuntimeDyldMachOX86_64(RuntimeDyld::MemoryManager &MM,
+                         RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
 
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index 81e51d1..bfd87ec 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -1,8 +1,10 @@
-# Disable the coverage instrumentation for the fuzzer itself.
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2 -fsanitize-coverage=0")
-if( LLVM_USE_SANITIZE_COVERAGE  )
+set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS_RELEASE}")
+# Disable the coverage and sanitizer instrumentation for the fuzzer itself.
+set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O2 -fno-sanitize=all")
+if( LLVM_USE_SANITIZE_COVERAGE )
   add_library(LLVMFuzzerNoMain OBJECT
     FuzzerCrossOver.cpp
+    FuzzerDFSan.cpp
     FuzzerDriver.cpp
     FuzzerIO.cpp
     FuzzerLoop.cpp
diff --git a/lib/Fuzzer/FuzzerDFSan.cpp b/lib/Fuzzer/FuzzerDFSan.cpp
new file mode 100644
index 0000000..16f8c0f
--- /dev/null
+++ b/lib/Fuzzer/FuzzerDFSan.cpp
@@ -0,0 +1,275 @@
+//===- FuzzerDFSan.cpp - DFSan-based fuzzer mutator -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// DataFlowSanitizer (DFSan) is a tool for
+// generalised dynamic data flow (taint) analysis:
+// http://clang.llvm.org/docs/DataFlowSanitizer.html .
+//
+// This file implements a mutation algorithm based on taint
+// analysis feedback from DFSan.
+//
+// The approach has some similarity to "Taint-based Directed Whitebox Fuzzing"
+// by Vijay Ganesh & Tim Leek & Martin Rinard:
+// http://dspace.mit.edu/openaccess-disseminate/1721.1/59320,
+// but it uses a full blown LLVM IR taint analysis and separate instrumentation
+// to analyze all of the "attack points" at once.
+//
+// Workflow:
+//   * lib/Fuzzer/Fuzzer*.cpp is compiled w/o any instrumentation.
+//   * The code under test is compiled with DFSan *and* with special extra hooks
+//     that are inserted before dfsan. Currently supported hooks:
+//     - __sanitizer_cov_trace_cmp: inserted before every ICMP instruction,
+//       receives the type, size and arguments of ICMP.
+//   * Every call to HOOK(a,b) is replaced by DFSan with
+//     __dfsw_HOOK(a, b, label(a), label(b)) so that __dfsw_HOOK
+//     gets all the taint labels for the arguments.
+//   * At the Fuzzer startup we assign a unique DFSan label
+//     to every byte of the input string (Fuzzer::CurrentUnit) so that for any
+//     chunk of data we know which input bytes it has derived from.
+//   * The __dfsw_* functions (implemented in this file) record the
+//     parameters (i.e. the application data and the corresponding taint labels)
+//     in a global state.
+//   * Fuzzer::MutateWithDFSan() tries to use the data recorded by __dfsw_*
+//     hooks to guide the fuzzing towards new application states.
+//     For example if 4 bytes of data that derive from input bytes {4,5,6,7}
+//     are compared with a constant 12345 and the comparison always yields
+//     the same result, we try to insert 12345, 12344, 12346 into bytes
+//     {4,5,6,7} of the next fuzzed inputs.
+//
+// This code does not function when DFSan is not linked in.
+// Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer
+// we redeclare the dfsan_* interface functions as weak and check if they
+// are nullptr before calling.
+// If this approach proves to be useful we may add attribute(weak) to the
+// dfsan declarations in dfsan_interface.h
+//
+// This module is in the "proof of concept" stage.
+// It is capable of solving only the simplest puzzles
+// like test/dfsan/DFSanSimpleCmpTest.cpp.
+//===----------------------------------------------------------------------===//
+
+/* Example of manual usage:
+(
+  cd $LLVM/lib/Fuzzer/
+  clang  -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp
+  clang++ -O0 -std=c++11 -fsanitize-coverage=3  \
+    -mllvm -sanitizer-coverage-experimental-trace-compares=1 \
+    -fsanitize=dataflow -fsanitize-blacklist=./dfsan_fuzzer_abi.list  \
+    test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o
+  ./a.out
+)
+*/
+
+#include "FuzzerInternal.h"
+#include <sanitizer/dfsan_interface.h>
+
+#include <cstring>
+#include <iostream>
+#include <unordered_map>
+
+extern "C" {
+__attribute__((weak))
+dfsan_label dfsan_create_label(const char *desc, void *userdata);
+__attribute__((weak))
+void dfsan_set_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+void dfsan_add_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
+}  // extern "C"
+
+namespace {
+
+// These values are copied from include/llvm/IR/InstrTypes.h.
+// We do not include the LLVM headers here to remain independent.
+// If these values ever change, an assertion in ComputeCmp will fail.
+enum Predicate {
+  ICMP_EQ = 32,  ///< equal
+  ICMP_NE = 33,  ///< not equal
+  ICMP_UGT = 34, ///< unsigned greater than
+  ICMP_UGE = 35, ///< unsigned greater or equal
+  ICMP_ULT = 36, ///< unsigned less than
+  ICMP_ULE = 37, ///< unsigned less or equal
+  ICMP_SGT = 38, ///< signed greater than
+  ICMP_SGE = 39, ///< signed greater or equal
+  ICMP_SLT = 40, ///< signed less than
+  ICMP_SLE = 41, ///< signed less or equal
+};
+
+template <class U, class S>
+bool ComputeCmp(size_t CmpType, U Arg1, U Arg2) {
+  switch(CmpType) {
+    case ICMP_EQ : return Arg1 == Arg2;
+    case ICMP_NE : return Arg1 != Arg2;
+    case ICMP_UGT: return Arg1 > Arg2;
+    case ICMP_UGE: return Arg1 >= Arg2;
+    case ICMP_ULT: return Arg1 < Arg2;
+    case ICMP_ULE: return Arg1 <= Arg2;
+    case ICMP_SGT: return (S)Arg1 > (S)Arg2;
+    case ICMP_SGE: return (S)Arg1 >= (S)Arg2;
+    case ICMP_SLT: return (S)Arg1 < (S)Arg2;
+    case ICMP_SLE: return (S)Arg1 <= (S)Arg2;
+    default: assert(0 && "unsupported CmpType");
+  }
+  return false;
+}
+
+static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1,
+                       uint64_t Arg2) {
+  if (CmpSize == 8) return ComputeCmp<uint64_t, int64_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 4) return ComputeCmp<uint32_t, int32_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 2) return ComputeCmp<uint16_t, int16_t>(CmpType, Arg1, Arg2);
+  if (CmpSize == 1) return ComputeCmp<uint8_t, int8_t>(CmpType, Arg1, Arg2);
+  assert(0 && "unsupported type size");
+  return true;
+}
+
+// As a simplification we use the range of input bytes instead of a set of input
+// bytes.
+struct LabelRange {
+  uint16_t Beg, End;  // Range is [Beg, End), thus Beg==End is an empty range.
+
+  LabelRange(uint16_t Beg = 0, uint16_t End = 0) : Beg(Beg), End(End) {}
+
+  static LabelRange Join(LabelRange LR1, LabelRange LR2) {
+    if (LR1.Beg == LR1.End) return LR2;
+    if (LR2.Beg == LR2.End) return LR1;
+    return {std::min(LR1.Beg, LR2.Beg), std::max(LR1.End, LR2.End)};
+  }
+  LabelRange &Join(LabelRange LR) {
+    return *this = Join(*this, LR);
+  }
+  static LabelRange Singleton(const dfsan_label_info *LI) {
+    uint16_t Idx = (uint16_t)(uintptr_t)LI->userdata;
+    assert(Idx > 0);
+    return {(uint16_t)(Idx - 1), Idx};
+  }
+};
+
+std::ostream &operator<<(std::ostream &os, const LabelRange &LR) {
+  return os << "[" << LR.Beg << "," << LR.End << ")";
+}
+
+class DFSanState {
+ public:
+   DFSanState(const fuzzer::Fuzzer::FuzzingOptions &Options)
+       : Options(Options) {}
+
+  struct CmpSiteInfo {
+    size_t ResCounters[2] = {0, 0};
+    size_t CmpSize = 0;
+    LabelRange LR;
+    std::unordered_map<uint64_t, size_t> CountedConstants;
+  };
+
+  LabelRange GetLabelRange(dfsan_label L);
+  void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+                        uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
+                        dfsan_label L2);
+  bool Mutate(fuzzer::Unit *U);
+
+ private:
+  std::unordered_map<uintptr_t, CmpSiteInfo> PcToCmpSiteInfoMap;
+  LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {};
+  const fuzzer::Fuzzer::FuzzingOptions &Options;
+};
+
+LabelRange DFSanState::GetLabelRange(dfsan_label L) {
+  LabelRange &LR = LabelRanges[L];
+  if (LR.Beg < LR.End || L == 0)
+    return LR;
+  const dfsan_label_info *LI = dfsan_get_label_info(L);
+  if (LI->l1 || LI->l2)
+    return LR = LabelRange::Join(GetLabelRange(LI->l1), GetLabelRange(LI->l2));
+  return LR = LabelRange::Singleton(LI);
+}
+
+void DFSanState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+                                  uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
+                                  dfsan_label L2) {
+  if (L1 == 0 && L2 == 0)
+    return;  // Not actionable.
+  if (L1 != 0 && L2 != 0)
+    return;  // Probably still actionable.
+  bool Res = ComputeCmp(CmpSize, CmpType, Arg1, Arg2);
+  CmpSiteInfo &CSI = PcToCmpSiteInfoMap[PC];
+  CSI.CmpSize = CmpSize;
+  CSI.LR.Join(GetLabelRange(L1)).Join(GetLabelRange(L2));
+  if (!L1) CSI.CountedConstants[Arg1]++;
+  if (!L2) CSI.CountedConstants[Arg2]++;
+  size_t Counter = CSI.ResCounters[Res]++;
+
+  if (Options.Verbosity >= 2  &&
+      (Counter & (Counter - 1)) == 0 &&
+      CSI.ResCounters[!Res] == 0)
+    std::cerr << "DFSAN:"
+              << " PC " << std::hex << PC << std::dec
+              << " S " << CmpSize
+              << " T " << CmpType
+              << " A1 " << Arg1 << " A2 " << Arg2 << " R " << Res
+              << " L" << L1 << GetLabelRange(L1)
+              << " L" << L2 << GetLabelRange(L2)
+              << " LR " << CSI.LR
+              << "\n";
+}
+
+bool DFSanState::Mutate(fuzzer::Unit *U) {
+  for (auto &PCToCmp : PcToCmpSiteInfoMap) {
+    auto &CSI = PCToCmp.second;
+    if (CSI.ResCounters[0] * CSI.ResCounters[1] != 0) continue;
+    if (CSI.ResCounters[0] + CSI.ResCounters[1] < 1000) continue;
+    if (CSI.CountedConstants.size() != 1) continue;
+    uintptr_t C = CSI.CountedConstants.begin()->first;
+    if (U->size() >= CSI.CmpSize) {
+      size_t RangeSize = CSI.LR.End - CSI.LR.Beg;
+      size_t Idx = CSI.LR.Beg + rand() % RangeSize;
+      if (Idx + CSI.CmpSize > U->size()) continue;
+      C += rand() % 5 - 2;
+      memcpy(U->data() + Idx, &C, CSI.CmpSize);
+      return true;
+    }
+  }
+  return false;
+}
+
+static DFSanState *DFSan;
+
+}  // namespace
+
+namespace fuzzer {
+
+bool Fuzzer::MutateWithDFSan(Unit *U) {
+  if (!&dfsan_create_label || !DFSan) return false;
+  return DFSan->Mutate(U);
+}
+
+void Fuzzer::InitializeDFSan() {
+  if (!&dfsan_create_label || !Options.UseDFSan) return;
+  DFSan = new DFSanState(Options);
+  CurrentUnit.resize(Options.MaxLen);
+  for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) {
+    dfsan_label L = dfsan_create_label("input", (void*)(i + 1));
+    // We assume that no one else has called dfsan_create_label before.
+    assert(L == i + 1);
+    dfsan_set_label(L, &CurrentUnit[i], 1);
+  }
+}
+
+}  // namespace fuzzer
+
+extern "C" {
+void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
+                                      uint64_t Arg2, dfsan_label L0,
+                                      dfsan_label L1, dfsan_label L2) {
+  assert(L0 == 0);
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  uint64_t CmpSize = (SizeAndType >> 32) / 8;
+  uint64_t Type = (SizeAndType << 32) >> 32;
+  DFSan->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2);
+}
+}  // extern "C"
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 9ccd744..05a699e 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -18,6 +18,10 @@
 #include <thread>
 #include <atomic>
 #include <mutex>
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include <iterator>
 
 namespace fuzzer {
 
@@ -26,19 +30,26 @@ struct FlagDescription {
   const char *Name;
   const char *Description;
   int   Default;
-  int   *Flag;
+  int   *IntFlag;
+  const char **StrFlag;
 };
 
 struct {
-#define FUZZER_FLAG(Type, Name, Default, Description) Type Name;
+#define FUZZER_FLAG_INT(Name, Default, Description) int Name;
+#define FUZZER_FLAG_STRING(Name, Description) const char *Name;
 #include "FuzzerFlags.def"
-#undef FUZZER_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_STRING
 } Flags;
 
 static FlagDescription FlagDescriptions [] {
-#define FUZZER_FLAG(Type, Name, Default, Description) {#Name, Description, Default, &Flags.Name},
+#define FUZZER_FLAG_INT(Name, Default, Description)                            \
+  { #Name, Description, Default, &Flags.Name, nullptr},
+#define FUZZER_FLAG_STRING(Name, Description)                                  \
+  { #Name, Description, 0, nullptr, &Flags.Name },
 #include "FuzzerFlags.def"
-#undef FUZZER_FLAG
+#undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_STRING
 };
 
 static const size_t kNumFlags =
@@ -79,11 +90,18 @@ static bool ParseOneFlag(const char *Param) {
     const char *Name = FlagDescriptions[F].Name;
     const char *Str = FlagValue(Param, Name);
     if (Str)  {
-      int Val = std::stol(Str);
-      *FlagDescriptions[F].Flag = Val;
-      if (Flags.verbosity >= 2)
-        std::cerr << "Flag: " << Name << " " << Val << "\n";
-      return true;
+      if (FlagDescriptions[F].IntFlag) {
+        int Val = std::stol(Str);
+        *FlagDescriptions[F].IntFlag = Val;
+        if (Flags.verbosity >= 2)
+          std::cerr << "Flag: " << Name << " " << Val << "\n";
+        return true;
+      } else if (FlagDescriptions[F].StrFlag) {
+        *FlagDescriptions[F].StrFlag = Str;
+        if (Flags.verbosity >= 2)
+          std::cerr << "Flag: " << Name << " " << Str << "\n";
+        return true;
+      }
     }
   }
   PrintHelp();
@@ -92,8 +110,12 @@ static bool ParseOneFlag(const char *Param) {
 
 // We don't use any library to minimize dependencies.
 static void ParseFlags(int argc, char **argv) {
-  for (size_t F = 0; F < kNumFlags; F++)
-    *FlagDescriptions[F].Flag = FlagDescriptions[F].Default;
+  for (size_t F = 0; F < kNumFlags; F++) {
+    if (FlagDescriptions[F].IntFlag)
+      *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default;
+    if (FlagDescriptions[F].StrFlag)
+      *FlagDescriptions[F].StrFlag = nullptr;
+  }
   for (int A = 1; A < argc; A++) {
     if (ParseOneFlag(argv[A])) continue;
     inputs.push_back(argv[A]);
@@ -139,6 +161,26 @@ static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers,
   return HasErrors ? 1 : 0;
 }
 
+std::vector<std::string> ReadTokensFile(const char *TokensFilePath) {
+  if (!TokensFilePath) return {};
+  std::string TokensFileContents = FileToString(TokensFilePath);
+  std::istringstream ISS(TokensFileContents);
+  std::vector<std::string> Res = {std::istream_iterator<std::string>{ISS},
+                                  std::istream_iterator<std::string>{}};
+  Res.push_back(" ");
+  Res.push_back("\t");
+  Res.push_back("\n");
+  return Res;
+}
+
+int ApplyTokens(const Fuzzer &F, const char *InputFilePath) {
+  Unit U = FileToVector(InputFilePath);
+  auto T = F.SubstituteTokens(U);
+  T.push_back(0);
+  std::cout << T.data();
+  return 0;
+}
+
 int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   using namespace fuzzer;
 
@@ -161,8 +203,10 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   Options.UseCounters = Flags.use_counters;
   Options.UseFullCoverageSet = Flags.use_full_coverage_set;
   Options.UseCoveragePairs = Flags.use_coverage_pairs;
+  Options.UseDFSan = Flags.dfsan;
   Options.PreferSmallDuringInitialShuffle =
       Flags.prefer_small_during_initial_shuffle;
+  Options.Tokens = ReadTokensFile(Flags.tokens);
   if (Flags.runs >= 0)
     Options.MaxNumberOfRuns = Flags.runs;
   if (!inputs.empty())
@@ -181,6 +225,16 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
   if (Flags.timeout > 0)
     SetTimer(Flags.timeout);
 
+  if (Flags.verbosity >= 2) {
+    std::cerr << "Tokens: {";
+    for (auto &T : Options.Tokens)
+      std::cerr << T << ",";
+    std::cerr << "}\n";
+  }
+
+  if (Flags.apply_tokens)
+    return ApplyTokens(F, Flags.apply_tokens);
+
   for (auto &inp : inputs)
     F.ReadDir(inp);
 
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 08176af..dbaf75d 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -6,41 +6,48 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-// Flags. FUZZER_FLAG macro should be defined at the point of inclusion.
-// We are not using any flag parsing library for better portability and
-// independence.
+// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the
+// point of inclusion. We are not using any flag parsing library for better
+// portability and independence.
 //===----------------------------------------------------------------------===//
-FUZZER_FLAG(int, verbosity, 1, "Verbosity level.")
-FUZZER_FLAG(int, seed, 0, "Random seed. If 0, seed is generated.")
-FUZZER_FLAG(int, iterations, -1,
+FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.")
+FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.")
+FUZZER_FLAG_INT(iterations, -1,
             "Number of iterations of the fuzzer internal loop"
             " (-1 for infinite iterations).")
-FUZZER_FLAG(int, runs, -1,
+FUZZER_FLAG_INT(runs, -1,
             "Number of individual test runs (-1 for infinite runs).")
-FUZZER_FLAG(int, max_len, 64, "Maximal length of the test input.")
-FUZZER_FLAG(int, cross_over, 1, "If 1, cross over inputs.")
-FUZZER_FLAG(int, mutate_depth, 5,
+FUZZER_FLAG_INT(max_len, 64, "Maximal length of the test input.")
+FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
+FUZZER_FLAG_INT(mutate_depth, 5,
             "Apply this number of consecutive mutations to each input.")
-FUZZER_FLAG(
-    int, prefer_small_during_initial_shuffle, -1,
+FUZZER_FLAG_INT(
+    prefer_small_during_initial_shuffle, -1,
     "If 1, always prefer smaller inputs during the initial corpus shuffle."
     " If 0, never do that. If -1, do it sometimes.")
-FUZZER_FLAG(int, exit_on_first, 0,
+FUZZER_FLAG_INT(exit_on_first, 0,
             "If 1, exit after the first new interesting input is found.")
-FUZZER_FLAG(int, timeout, -1, "Timeout in seconds (if positive).")
-FUZZER_FLAG(int, help, 0, "Print help.")
-FUZZER_FLAG(
-    int, save_minimized_corpus, 0,
+FUZZER_FLAG_INT(timeout, -1, "Timeout in seconds (if positive).")
+FUZZER_FLAG_INT(help, 0, "Print help.")
+FUZZER_FLAG_INT(
+    save_minimized_corpus, 0,
     "If 1, the minimized corpus is saved into the first input directory")
-FUZZER_FLAG(int, use_counters, 0, "Use coverage counters")
-FUZZER_FLAG(int, use_full_coverage_set, 0,
+FUZZER_FLAG_INT(use_counters, 0, "Use coverage counters")
+FUZZER_FLAG_INT(use_full_coverage_set, 0,
             "Experimental: Maximize the number of different full"
             " coverage sets as opposed to maximizing the total coverage."
             " This is potentially MUCH slower, but may discover more paths.")
-FUZZER_FLAG(int, use_coverage_pairs, 0,
+FUZZER_FLAG_INT(use_coverage_pairs, 0,
             "Experimental: Maximize the number of different coverage pairs.")
-FUZZER_FLAG(int, jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
+FUZZER_FLAG_INT(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
                           " this number of jobs in separate worker processes"
                           " with stdout/stderr redirected to fuzz-JOB.log.")
-FUZZER_FLAG(int, workers, 0,
+FUZZER_FLAG_INT(workers, 0,
             "Number of simultaneous worker processes to run the jobs.")
+FUZZER_FLAG_INT(dfsan, 1, "Use DFSan for taint-guided mutations. No-op unless "
+                           "the DFSan instrumentation was compiled in.")
+
+FUZZER_FLAG_STRING(tokens, "Use the file with tokens (one token per line) to"
+                           " fuzz a token based input language.")
+FUZZER_FLAG_STRING(apply_tokens, "Read the given input file, substitute bytes "
+                                 " with tokens and write the result to stdout.")
diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp
index 224808c..ef23d42 100644
--- a/lib/Fuzzer/FuzzerIO.cpp
+++ b/lib/Fuzzer/FuzzerIO.cpp
@@ -33,6 +33,12 @@ Unit FileToVector(const std::string &Path) {
               std::istreambuf_iterator<char>());
 }
 
+std::string FileToString(const std::string &Path) {
+  std::ifstream T(Path);
+  return std::string((std::istreambuf_iterator<char>(T)),
+                     std::istreambuf_iterator<char>());
+}
+
 void CopyFileToErr(const std::string &Path) {
   std::ifstream T(Path);
   std::copy(std::istreambuf_iterator<char>(T), std::istreambuf_iterator<char>(),
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index e4e5eb7..7787109 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -23,7 +23,8 @@ namespace fuzzer {
 typedef std::vector<uint8_t> Unit;
 using namespace std::chrono;
 
-Unit ReadFile(const char *Path);
+std::string FileToString(const std::string &Path);
+Unit FileToVector(const std::string &Path);
 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V);
 void WriteToFile(const Unit &U, const std::string &Path);
 void CopyFileToErr(const std::string &Path);
@@ -51,17 +52,17 @@ class Fuzzer {
     bool UseCounters = false;
     bool UseFullCoverageSet  = false;
     bool UseCoveragePairs = false;
+    bool UseDFSan = false;
     int PreferSmallDuringInitialShuffle = -1;
     size_t MaxNumberOfRuns = ULONG_MAX;
     std::string OutputCorpus;
+    std::vector<std::string> Tokens;
   };
-  Fuzzer(UserCallback Callback, FuzzingOptions Options)
-      : Callback(Callback), Options(Options) {
-    SetDeathCallback();
-  }
+  Fuzzer(UserCallback Callback, FuzzingOptions Options);
   void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
   size_t Loop(size_t NumIterations);
   void ShuffleAndMinimize();
+  void InitializeDFSan();
   size_t CorpusSize() const { return Corpus.size(); }
   void ReadDir(const std::string &Path) {
     ReadDirToVectorOfUnits(Path.c_str(), &Corpus);
@@ -76,20 +77,28 @@ class Fuzzer {
 
   size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; }
 
-  static void AlarmCallback();
+  static void StaticAlarmCallback();
+
+  Unit SubstituteTokens(const Unit &U) const;
 
  private:
+  void AlarmCallback();
+  void ExecuteCallback(const Unit &U);
   size_t MutateAndTestOne(Unit *U);
   size_t RunOne(const Unit &U);
   size_t RunOneMaximizeTotalCoverage(const Unit &U);
   size_t RunOneMaximizeFullCoverageSet(const Unit &U);
   size_t RunOneMaximizeCoveragePairs(const Unit &U);
   void WriteToOutputCorpus(const Unit &U);
-  static void WriteToCrash(const Unit &U, const char *Prefix);
+  void WriteToCrash(const Unit &U, const char *Prefix);
+  bool MutateWithDFSan(Unit *U);
+  void PrintStats(const char *Where, size_t Cov, const char *End = "\n");
+  void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = "");
 
   void SetDeathCallback();
-  static void DeathCallback();
-  static Unit CurrentUnit;
+  static void StaticDeathCallback();
+  void DeathCallback();
+  Unit CurrentUnit;
 
   size_t TotalNumberOfRuns = 0;
 
@@ -108,7 +117,8 @@ class Fuzzer {
   UserCallback Callback;
   FuzzingOptions Options;
   system_clock::time_point ProcessStartTime = system_clock::now();
-  static system_clock::time_point UnitStartTime;
+  system_clock::time_point UnitStartTime;
+  long TimeOfLongestUnitInSeconds = 0;
 };
 
 };  // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 563fbf4..9dfe30b 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -16,21 +16,49 @@
 
 namespace fuzzer {
 
-// static
-Unit Fuzzer::CurrentUnit;
-system_clock::time_point Fuzzer::UnitStartTime;
+// Only one Fuzzer per process.
+static Fuzzer *F;
+
+Fuzzer::Fuzzer(UserCallback Callback, FuzzingOptions Options)
+    : Callback(Callback), Options(Options) {
+  SetDeathCallback();
+  InitializeDFSan();
+  assert(!F);
+  F = this;
+}
 
 void Fuzzer::SetDeathCallback() {
-  __sanitizer_set_death_callback(DeathCallback);
+  __sanitizer_set_death_callback(StaticDeathCallback);
+}
+
+void Fuzzer::PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter) {
+  if (Options.Tokens.empty()) {
+    PrintASCII(U, PrintAfter);
+  } else {
+    auto T = SubstituteTokens(U);
+    T.push_back(0);
+    std::cerr << T.data();
+    std::cerr << PrintAfter;
+  }
+}
+
+void Fuzzer::StaticDeathCallback() {
+  assert(F);
+  F->DeathCallback();
 }
 
 void Fuzzer::DeathCallback() {
   std::cerr << "DEATH: " <<  std::endl;
   Print(CurrentUnit, "\n");
-  PrintASCII(CurrentUnit, "\n");
+  PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
   WriteToCrash(CurrentUnit, "crash-");
 }
 
+void Fuzzer::StaticAlarmCallback() {
+  assert(F);
+  F->AlarmCallback();
+}
+
 void Fuzzer::AlarmCallback() {
   size_t Seconds =
       duration_cast<seconds>(system_clock::now() - UnitStartTime).count();
@@ -38,27 +66,40 @@ void Fuzzer::AlarmCallback() {
             << std::endl;
   if (Seconds >= 3) {
     Print(CurrentUnit, "\n");
-    PrintASCII(CurrentUnit, "\n");
+    PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
     WriteToCrash(CurrentUnit, "timeout-");
   }
   exit(1);
 }
 
+void Fuzzer::PrintStats(const char *Where, size_t Cov, const char *End) {
+  if (!Options.Verbosity) return;
+  size_t Seconds = secondsSinceProcessStartUp();
+  size_t ExecPerSec = (Seconds ? TotalNumberOfRuns / Seconds : 0);
+  std::cerr
+      << "#" << TotalNumberOfRuns
+      << "\t" << Where
+      << " cov " << Cov
+      << " bits " << TotalBits()
+      << " units " << Corpus.size()
+      << " exec/s " << ExecPerSec
+      << End;
+}
+
 void Fuzzer::ShuffleAndMinimize() {
+  size_t MaxCov = 0;
   bool PreferSmall =
       (Options.PreferSmallDuringInitialShuffle == 1 ||
        (Options.PreferSmallDuringInitialShuffle == -1 && rand() % 2));
   if (Options.Verbosity)
-    std::cerr << "Shuffle: Size: " << Corpus.size()
-              << " prefer small: " << PreferSmall
-              << "\n";
+    std::cerr << "PreferSmall: " << PreferSmall << "\n";
+  PrintStats("READ  ", 0);
   std::vector<Unit> NewCorpus;
   std::random_shuffle(Corpus.begin(), Corpus.end());
   if (PreferSmall)
     std::stable_sort(
         Corpus.begin(), Corpus.end(),
         [](const Unit &A, const Unit &B) { return A.size() < B.size(); });
-  size_t MaxCov = 0;
   Unit &U = CurrentUnit;
   for (const auto &C : Corpus) {
     for (size_t First = 0; First < 1; First++) {
@@ -77,18 +118,29 @@ void Fuzzer::ShuffleAndMinimize() {
     }
   }
   Corpus = NewCorpus;
-  if (Options.Verbosity)
-    std::cerr << "Shuffle done: " << Corpus.size() << " IC: " << MaxCov << "\n";
+  PrintStats("INITED", MaxCov);
 }
 
 size_t Fuzzer::RunOne(const Unit &U) {
   UnitStartTime = system_clock::now();
   TotalNumberOfRuns++;
+  size_t Res = 0;
   if (Options.UseFullCoverageSet)
-    return RunOneMaximizeFullCoverageSet(U);
-  if (Options.UseCoveragePairs)
-    return RunOneMaximizeCoveragePairs(U);
-  return RunOneMaximizeTotalCoverage(U);
+    Res = RunOneMaximizeFullCoverageSet(U);
+  else if (Options.UseCoveragePairs)
+    Res = RunOneMaximizeCoveragePairs(U);
+  else
+    Res = RunOneMaximizeTotalCoverage(U);
+  auto UnitStopTime = system_clock::now();
+  auto TimeOfUnit =
+      duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
+  if (TimeOfUnit > TimeOfLongestUnitInSeconds) {
+    TimeOfLongestUnitInSeconds = TimeOfUnit;
+    std::cerr << "Longest unit: " << TimeOfLongestUnitInSeconds
+              << " s:\n";
+    Print(U, "\n");
+  }
+  return Res;
 }
 
 static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
@@ -99,12 +151,35 @@ static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
   return Res;
 }
 
+Unit Fuzzer::SubstituteTokens(const Unit &U) const {
+  Unit Res;
+  for (auto Idx : U) {
+    if (Idx < Options.Tokens.size()) {
+      std::string Token = Options.Tokens[Idx];
+      Res.insert(Res.end(), Token.begin(), Token.end());
+    } else {
+      Res.push_back(' ');
+    }
+  }
+  // FIXME: Apply DFSan labels.
+  return Res;
+}
+
+void Fuzzer::ExecuteCallback(const Unit &U) {
+  if (Options.Tokens.empty()) {
+    Callback(U.data(), U.size());
+  } else {
+    auto T = SubstituteTokens(U);
+    Callback(T.data(), T.size());
+  }
+}
+
 // Experimental. Does not yet scale.
 // Fuly reset the current coverage state, run a single unit,
 // collect all coverage pairs and return non-zero if a new pair is observed.
 size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) {
   __sanitizer_reset_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   uintptr_t *PCs;
   uintptr_t NumPCs = __sanitizer_get_coverage_guards(&PCs);
   bool HasNewPairs = false;
@@ -129,7 +204,7 @@ size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) {
 // e.g. test/FullCoverageSetTest.cpp. FIXME: make it scale.
 size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) {
   __sanitizer_reset_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   uintptr_t *PCs;
   uintptr_t NumPCs =__sanitizer_get_coverage_guards(&PCs);
   if (FullCoverageSets.insert(HashOfArrayOfPCs(PCs, NumPCs)).second)
@@ -144,21 +219,16 @@ size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) {
     __sanitizer_update_counter_bitset_and_clear_counters(0);
   }
   size_t OldCoverage = __sanitizer_get_total_unique_coverage();
-  Callback(U.data(), U.size());
+  ExecuteCallback(U);
   size_t NewCoverage = __sanitizer_get_total_unique_coverage();
   size_t NumNewBits = 0;
   if (Options.UseCounters)
     NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters(
         CounterBitmap.data());
 
-  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) {
-    size_t Seconds = secondsSinceProcessStartUp();
-    std::cerr
-        << "#" << TotalNumberOfRuns
-        << "\tcov: " << NewCoverage
-        << "\tbits: " << TotalBits()
-        << "\texec/s: " << (Seconds ? TotalNumberOfRuns / Seconds : 0) << "\n";
-  }
+  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity)
+    PrintStats("pulse ", NewCoverage);
+
   if (NewCoverage > OldCoverage || NumNewBits)
     return NewCoverage;
   return 0;
@@ -192,22 +262,18 @@ size_t Fuzzer::MutateAndTestOne(Unit *U) {
   for (int i = 0; i < Options.MutateDepth; i++) {
     if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
       return NewUnits;
+    MutateWithDFSan(U);
     Mutate(U, Options.MaxLen);
     size_t NewCoverage = RunOne(*U);
     if (NewCoverage) {
       Corpus.push_back(*U);
       NewUnits++;
+      PrintStats("NEW   ", NewCoverage, "");
       if (Options.Verbosity) {
-        std::cerr << "#" << TotalNumberOfRuns
-                  << "\tNEW: " << NewCoverage
-                  << " B: " << TotalBits()
-                  << " L: " << U->size()
-                  << " S: " << Corpus.size()
-                  << " I: " << i
-                  << "\t";
+        std::cerr << " L: " << U->size();
         if (U->size() < 30) {
-          PrintASCII(*U);
-          std::cerr << "\t";
+          std::cerr << " ";
+          PrintUnitInASCIIOrTokens(*U, "\t");
           Print(*U);
         }
         std::cerr << "\n";
diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp
index 679f289..3635f39 100644
--- a/lib/Fuzzer/FuzzerUtil.cpp
+++ b/lib/Fuzzer/FuzzerUtil.cpp
@@ -19,15 +19,18 @@
 namespace fuzzer {
 
 void Print(const Unit &v, const char *PrintAfter) {
-  std::cerr << v.size() << ": ";
   for (auto x : v)
-    std::cerr << (unsigned) x << " ";
+    std::cerr << "0x" << std::hex << (unsigned) x << std::dec << ",";
   std::cerr << PrintAfter;
 }
 
 void PrintASCII(const Unit &U, const char *PrintAfter) {
-  for (auto X : U)
-    std::cerr << (char)((isascii(X) && X >= ' ') ? X : '?');
+  for (auto X : U) {
+    if (isprint(X))
+      std::cerr << X;
+    else
+      std::cerr << "\\x" << std::hex << (int)(unsigned)X << std::dec;
+  }
   std::cerr << PrintAfter;
 }
 
@@ -43,7 +46,7 @@ std::string Hash(const Unit &in) {
 }
 
 static void AlarmHandler(int, siginfo_t *, void *) {
-  Fuzzer::AlarmCallback();
+  Fuzzer::StaticAlarmCallback();
 }
 
 void SetTimer(int Seconds) {
diff --git a/lib/Fuzzer/README.txt b/lib/Fuzzer/README.txt
index e4d6b4f..79f49b5 100644
--- a/lib/Fuzzer/README.txt
+++ b/lib/Fuzzer/README.txt
@@ -1,112 +1,2 @@
-===============================
-Fuzzer -- a library for coverage-guided fuzz testing.
-===============================
+Move to http://llvm.org/docs/LibFuzzer.html
 
-This library is intended primarily for in-process coverage-guided fuzz testing
-(fuzzing) of other libraries. The typical workflow looks like this:
-
-  * Build the Fuzzer library as a static archive (or just a set of .o files).
-    Note that the Fuzzer contains the main() function.
-    Preferably do *not* use sanitizers while building the Fuzzer.
-  * Build the library you are going to test with -fsanitize-coverage=[234]
-    and one of the sanitizers. We recommend to build the library in several
-    different modes (e.g. asan, msan, lsan, ubsan, etc) and even using different
-    optimizations options (e.g. -O0, -O1, -O2) to diversify testing.
-  * Build a test driver using the same options as the library.
-    The test driver is a C/C++ file containing interesting calls to the library
-    inside a single function:
-    extern "C" void TestOneInput(const uint8_t *Data, size_t Size);
-  * Link the Fuzzer, the library and the driver together into an executable
-    using the same sanitizer options as for the library.
-  * Collect the initial corpus of inputs for the
-    fuzzer (a directory with test inputs, one file per input).
-    The better your inputs are the faster you will find something interesting.
-    Also try to keep your inputs small, otherwise the Fuzzer will run too slow.
-  * Run the fuzzer with the test corpus. As new interesting test cases are
-    discovered they will be added to the corpus. If a bug is discovered by
-    the sanitizer (asan, etc) it will be reported as usual and the reproducer
-    will be written to disk.
-    Each Fuzzer process is single-threaded (unless the library starts its own
-    threads). You can run the Fuzzer on the same corpus in multiple processes.
-    in parallel. For run-time options run the Fuzzer binary with '-help=1'.
-
-
-The Fuzzer is similar in concept to AFL (http://lcamtuf.coredump.cx/afl/),
-but uses in-process Fuzzing, which is more fragile, more restrictive, but
-potentially much faster as it has no overhead for process start-up.
-It uses LLVM's "Sanitizer Coverage" instrumentation to get in-process
-coverage-feedback https://code.google.com/p/address-sanitizer/wiki/AsanCoverage
-
-The code resides in the LLVM repository and is (or will be) used by various
-parts of LLVM, but the Fuzzer itself does not (and should not) depend on any
-part of LLVM and can be used for other projects. Ideally, the Fuzzer's code
-should not have any external dependencies. Right now it uses STL, which may need
-to be fixed later. See also F.A.Q. below.
-
-Examples of usage in LLVM:
-  * clang-format-fuzzer. The inputs are random pieces of C++-like text.
-  * Build (make sure to use fresh clang as the host compiler):
-    cmake -GNinja  -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-    -DLLVM_USE_SANITIZER=Address -DLLVM_USE_SANITIZE_COVERAGE=YES \
-    /path/to/llvm -DCMAKE_BUILD_TYPE=Release
-    ninja clang-format-fuzzer
-  * Optionally build other kinds of binaries (asan+Debug, msan, ubsan, etc)
-  * TODO: commit the pre-fuzzed corpus to svn (?).
-  * Run:
-      clang-format-fuzzer CORPUS_DIR
-
-Toy example (see SimpleTest.cpp):
-a simple function that does something interesting if it receives bytes "Hi!".
-  # Build the Fuzzer with asan:
-  % clang++ -std=c++11 -fsanitize=address -fsanitize-coverage=3 -O1 -g \
-     Fuzzer*.cpp test/SimpleTest.cpp
-  # Run the fuzzer with no corpus (assuming on empty input)
-  % ./a.out
-
-===============================================================================
-F.A.Q.
-
-Q. Why Fuzzer does not use any of the LLVM support?
-A. There are two reasons.
-First, we want this library to be used outside of the LLVM w/o users having to
-build the rest of LLVM. This may sound unconvincing for many LLVM folks,
-but in practice the need for building the whole LLVM frightens many potential
-users -- and we want more users to use this code.
-Second, there is a subtle technical reason not to rely on the rest of LLVM, or
-any other large body of code (maybe not even STL). When coverage instrumentation
-is enabled, it will also instrument the LLVM support code which will blow up the
-coverage set of the process (since the fuzzer is in-process). In other words, by
-using more external dependencies we will slow down the fuzzer while the main
-reason for it to exist is extreme speed.
-
-Q. What about Windows then? The Fuzzer contains code that does not build on
-Windows.
-A. The sanitizer coverage support does not work on Windows either as of 01/2015.
-Once it's there, we'll need to re-implement OS-specific parts (I/O, signals).
-
-Q. When this Fuzzer is not a good solution for a problem?
-A.
-  * If the test inputs are validated by the target library and the validator
-    asserts/crashes on invalid inputs, the in-process fuzzer is not applicable
-    (we could use fork() w/o exec, but it comes with extra overhead).
-  * Bugs in the target library may accumulate w/o being detected. E.g. a memory
-    corruption that goes undetected at first and then leads to a crash while
-    testing another input. This is why it is highly recommended to run this
-    in-process fuzzer with all sanitizers to detect most bugs on the spot.
-  * It is harder to protect the in-process fuzzer from excessive memory
-    consumption and infinite loops in the target library (still possible).
-  * The target library should not have significant global state that is not
-    reset between the runs.
-  * Many interesting target libs are not designed in a way that supports
-    the in-process fuzzer interface (e.g. require a file path instead of a
-    byte array).
-  * If a single test run takes a considerable fraction of a second (or
-    more) the speed benefit from the in-process fuzzer is negligible.
-  * If the target library runs persistent threads (that outlive
-    execution of one test) the fuzzing results will be unreliable.
-
-Q. So, what exactly this Fuzzer is good for?
-A. This Fuzzer might be a good choice for testing libraries that have relatively
-small inputs, each input takes < 1ms to run, and the library code is not expected
-to crash on invalid inputs.
-Examples: regular expression matchers, text or binary format parsers.
diff --git a/lib/Fuzzer/cxx_fuzzer_tokens.txt b/lib/Fuzzer/cxx_fuzzer_tokens.txt
new file mode 100644
index 0000000..f3c4f80
--- /dev/null
+++ b/lib/Fuzzer/cxx_fuzzer_tokens.txt
@@ -0,0 +1,218 @@
+#
+##
+`
+~
+!
+@
+$
+%
+^
+&
+*
+(
+)
+_
+-
+_
+=
++
+{
+}
+[
+]
+|
+\
+,
+.
+/
+?
+>
+<
+;
+:
+'
+"
+++
+--
+<<
+>>
++=
+-=
+*=
+/=
+>>=
+<<=
+&=
+|=
+^=
+%=
+!=
+&&
+||
+==
+>=
+<=
+->
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+alignas
+alignof
+and
+and_eq
+asm
+auto
+bitand
+bitor
+bool
+break
+case
+catch
+char
+char16_t
+char32_t
+class
+compl
+concept
+const
+constexpr
+const_cast
+continue
+decltype
+default
+delete
+do
+double
+dynamic_cast
+else
+enum
+explicit
+export
+extern
+false
+float
+for
+friend
+goto
+if
+inline
+int
+long
+mutable
+namespace
+new
+noexcept
+not
+not_eq
+nullptr
+operator
+or
+or_eq
+private
+protected
+public
+register
+reinterpret_cast
+requires
+return
+short
+signed
+sizeof
+static
+static_assert
+static_cast
+struct
+switch
+template
+this
+thread_local
+throw
+true
+try
+typedef
+typeid
+typename
+union
+unsigned
+using
+virtual
+void
+volatile
+wchar_t
+while
+xor
+xor_eq
+if
+elif
+else
+endif
+defined
+ifdef
+ifndef
+define
+undef
+include
+line
+error
+pragma
+override
+final
diff --git a/lib/Fuzzer/dfsan_fuzzer_abi.list b/lib/Fuzzer/dfsan_fuzzer_abi.list
new file mode 100644
index 0000000..7da7522
--- /dev/null
+++ b/lib/Fuzzer/dfsan_fuzzer_abi.list
@@ -0,0 +1,12 @@
+# Replaces __sanitizer_cov_trace_cmp with __dfsw___sanitizer_cov_trace_cmp
+fun:__sanitizer_cov_trace_cmp=custom
+fun:__sanitizer_cov_trace_cmp=uninstrumented
+
+# Ignores coverage callbacks.
+fun:__sanitizer_cov=uninstrumented
+fun:__sanitizer_cov=discard
+fun:__sanitizer_cov_module_init=uninstrumented
+fun:__sanitizer_cov_module_init=discard
+
+# Don't add extra parameters to the Fuzzer callback.
+fun:TestOneInput=uninstrumented
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index 08130c6..fb3bf20 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -2,10 +2,11 @@
 # basic blocks and we'll fail to discover the targets.
 # Also enable the coverage instrumentation back (it is disabled
 # for the Fuzzer lib)
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0 -fsanitize-coverage=4")
+set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O0 -fsanitize-coverage=4")
 
 set(Tests
   CounterTest
+  CxxTokensTest
   FourIndependentBranchesTest
   FullCoverageSetTest
   InfiniteTest
@@ -14,11 +15,14 @@ set(Tests
   TimeoutTest
   )
 
+set(DFSanTests
+  DFSanSimpleCmpTest
+  )
+
 set(TestBinaries)
 
 foreach(Test ${Tests})
   add_executable(LLVMFuzzer-${Test}
-    EXCLUDE_FROM_ALL
     ${Test}.cpp
     )
   target_link_libraries(LLVMFuzzer-${Test}
@@ -52,6 +56,13 @@ target_link_libraries(LLVMFuzzer-Unittest
 
 set(TestBinaries ${TestBinaries} LLVMFuzzer-Unittest)
 
+add_subdirectory(dfsan)
+
+foreach(Test ${DFSanTests})
+  set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test})
+endforeach()
+
+
 set_target_properties(${TestBinaries}
   PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
diff --git a/lib/Fuzzer/test/CxxTokensTest.cpp b/lib/Fuzzer/test/CxxTokensTest.cpp
new file mode 100644
index 0000000..1addccb
--- /dev/null
+++ b/lib/Fuzzer/test/CxxTokensTest.cpp
@@ -0,0 +1,24 @@
+// Simple test for a fuzzer. The fuzzer must find a sequence of C++ tokens.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <cstring>
+#include <iostream>
+
+static void Found() {
+  std::cout << "Found the target, exiting\n";
+  exit(1);
+}
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+  // looking for "thread_local unsigned A;"
+  if (Size < 24) return;
+  if (0 == memcmp(&Data[0], "thread_local", 12))
+    if (Data[12] == ' ')
+      if (0 == memcmp(&Data[13], "unsigned", 8))
+        if (Data[21] == ' ')
+          if (Data[22] == 'A')
+            if (Data[23] == ';')
+              Found();
+}
+
diff --git a/lib/Fuzzer/test/dfsan/CMakeLists.txt b/lib/Fuzzer/test/dfsan/CMakeLists.txt
new file mode 100644
index 0000000..b5b874f
--- /dev/null
+++ b/lib/Fuzzer/test/dfsan/CMakeLists.txt
@@ -0,0 +1,17 @@
+# These tests depend on both coverage and dfsan instrumentation.
+
+set(DFSAN_FUZZER_ABI_LIST "${CMAKE_CURRENT_SOURCE_DIR}/../../dfsan_fuzzer_abi.list")
+
+set(CMAKE_CXX_FLAGS_RELEASE
+  "${LIBFUZZER_FLAGS_BASE} -O0 -fno-sanitize=all -fsanitize=dataflow -mllvm -sanitizer-coverage-experimental-trace-compares=1 -fsanitize-blacklist=${DFSAN_FUZZER_ABI_LIST}")
+
+foreach(Test ${DFSanTests})
+  set_source_files_properties(${Test}.cpp PROPERTIES OBJECT_DEPENDS ${DFSAN_FUZZER_ABI_LIST})
+  add_executable(LLVMFuzzer-${Test}
+    ${Test}.cpp
+    )
+  target_link_libraries(LLVMFuzzer-${Test}
+    LLVMFuzzer
+    )
+endforeach()
+
diff --git a/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp
new file mode 100644
index 0000000..1162092
--- /dev/null
+++ b/lib/Fuzzer/test/dfsan/DFSanSimpleCmpTest.cpp
@@ -0,0 +1,30 @@
+// Simple test for a fuzzer. The fuzzer must find several narrow ranges.
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 14) return;
+  uint64_t x = 0;
+  int64_t  y = 0;
+  int z = 0;
+  unsigned short a = 0;
+  memcpy(&x, Data, 8);
+  memcpy(&y, Data + Size - 8, 8);
+  memcpy(&z, Data + Size / 2, sizeof(z));
+  memcpy(&a, Data + Size / 2 + 4, sizeof(a));
+
+  if (x > 1234567890 &&
+      x < 1234567895 &&
+      y >= 987654321 &&
+      y <= 987654325 &&
+      z < -10000 &&
+      z >= -10005 &&
+      z != -10003 &&
+      a == 4242) {
+    fprintf(stderr, "Found the target: size %zd (%zd, %zd, %d, %d), exiting.\n",
+            Size, x, y, z, a);
+    exit(1);
+  }
+}
diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test
index 45691f5..2a0e95f 100644
--- a/lib/Fuzzer/test/fuzzer.test
+++ b/lib/Fuzzer/test/fuzzer.test
@@ -20,3 +20,9 @@ FourIndependentBranchesTest: BINGO
 
 RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=CounterTest
 CounterTest: BINGO
+
+RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=DFSanSimpleCmpTest
+DFSanSimpleCmpTest: Found the target:
+
+RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s --check-prefix=CxxTokensTest
+CxxTokensTest: Found the target, exiting
diff --git a/lib/IR/Android.mk b/lib/IR/Android.mk
index 2ca02f7..32ebfae 100644
--- a/lib/IR/Android.mk
+++ b/lib/IR/Android.mk
@@ -43,7 +43,6 @@ vmcore_SRC_FILES := \
   Type.cpp \
   TypeFinder.cpp \
   Use.cpp \
-  UseListOrder.cpp \
   User.cpp \
   Value.cpp \
   ValueSymbolTable.cpp \
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index ae0beba..48737b5 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -401,9 +401,7 @@ public:
   /// NumberedTypes - The numbered types, along with their value.
   DenseMap<StructType*, unsigned> NumberedTypes;
 
-
-  TypePrinting() {}
-  ~TypePrinting() {}
+  TypePrinting() = default;
 
   void incorporateTypes(const Module &M);
 
@@ -1443,11 +1441,11 @@ void MDFieldPrinter::printDIFlags(StringRef Name, unsigned Flags) {
   Out << FS << Name << ": ";
 
   SmallVector<unsigned, 8> SplitFlags;
-  unsigned Extra = DIDescriptor::splitFlags(Flags, SplitFlags);
+  unsigned Extra = DebugNode::splitFlags(Flags, SplitFlags);
 
   FieldSeparator FlagsFS(" | ");
   for (unsigned F : SplitFlags) {
-    const char *StringF = DIDescriptor::getFlagString(F);
+    const char *StringF = DebugNode::getFlagString(F);
     assert(StringF && "Expected valid flag");
     Out << FlagsFS << StringF;
   }
@@ -1505,7 +1503,7 @@ static void writeMDSubrange(raw_ostream &Out, const MDSubrange *N,
   Out << "!MDSubrange(";
   MDFieldPrinter Printer(Out);
   Printer.printInt("count", N->getCount(), /* ShouldSkipZero */ false);
-  Printer.printInt("lowerBound", N->getLo());
+  Printer.printInt("lowerBound", N->getLowerBound());
   Out << ")";
 }
 
@@ -1539,16 +1537,16 @@ static void writeMDDerivedType(raw_ostream &Out, const MDDerivedType *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("scope", N->getScope());
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope());
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("baseType", N->getBaseType(),
+  Printer.printMetadata("baseType", N->getRawBaseType(),
                         /* ShouldSkipNull */ false);
   Printer.printInt("size", N->getSizeInBits());
   Printer.printInt("align", N->getAlignInBits());
   Printer.printInt("offset", N->getOffsetInBits());
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printMetadata("extraData", N->getExtraData());
+  Printer.printMetadata("extraData", N->getRawExtraData());
   Out << ")";
 }
 
@@ -1559,19 +1557,19 @@ static void writeMDCompositeType(raw_ostream &Out, const MDCompositeType *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("scope", N->getScope());
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope());
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("baseType", N->getBaseType());
+  Printer.printMetadata("baseType", N->getRawBaseType());
   Printer.printInt("size", N->getSizeInBits());
   Printer.printInt("align", N->getAlignInBits());
   Printer.printInt("offset", N->getOffsetInBits());
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printMetadata("elements", N->getElements());
+  Printer.printMetadata("elements", N->getRawElements());
   Printer.printDwarfEnum("runtimeLang", N->getRuntimeLang(),
                          dwarf::LanguageString);
-  Printer.printMetadata("vtableHolder", N->getVTableHolder());
-  Printer.printMetadata("templateParams", N->getTemplateParams());
+  Printer.printMetadata("vtableHolder", N->getRawVTableHolder());
+  Printer.printMetadata("templateParams", N->getRawTemplateParams());
   Printer.printString("identifier", N->getIdentifier());
   Out << ")";
 }
@@ -1582,7 +1580,8 @@ static void writeMDSubroutineType(raw_ostream &Out, const MDSubroutineType *N,
   Out << "!MDSubroutineType(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printMetadata("types", N->getTypeArray(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("types", N->getRawTypeArray(),
+                        /* ShouldSkipNull */ false);
   Out << ")";
 }
 
@@ -1604,7 +1603,7 @@ static void writeMDCompileUnit(raw_ostream &Out, const MDCompileUnit *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printDwarfEnum("language", N->getSourceLanguage(),
                          dwarf::LanguageString, /* ShouldSkipZero */ false);
-  Printer.printMetadata("file", N->getFile(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
   Printer.printString("producer", N->getProducer());
   Printer.printBool("isOptimized", N->isOptimized());
   Printer.printString("flags", N->getFlags());
@@ -1613,11 +1612,11 @@ static void writeMDCompileUnit(raw_ostream &Out, const MDCompileUnit *N,
   Printer.printString("splitDebugFilename", N->getSplitDebugFilename());
   Printer.printInt("emissionKind", N->getEmissionKind(),
                    /* ShouldSkipZero */ false);
-  Printer.printMetadata("enums", N->getEnumTypes());
-  Printer.printMetadata("retainedTypes", N->getRetainedTypes());
-  Printer.printMetadata("subprograms", N->getSubprograms());
-  Printer.printMetadata("globals", N->getGlobalVariables());
-  Printer.printMetadata("imports", N->getImportedEntities());
+  Printer.printMetadata("enums", N->getRawEnumTypes());
+  Printer.printMetadata("retainedTypes", N->getRawRetainedTypes());
+  Printer.printMetadata("subprograms", N->getRawSubprograms());
+  Printer.printMetadata("globals", N->getRawGlobalVariables());
+  Printer.printMetadata("imports", N->getRawImportedEntities());
   Out << ")";
 }
 
@@ -1628,23 +1627,23 @@ static void writeMDSubprogram(raw_ostream &Out, const MDSubprogram *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
   Printer.printString("linkageName", N->getLinkageName());
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Printer.printBool("isLocal", N->isLocalToUnit());
   Printer.printBool("isDefinition", N->isDefinition());
   Printer.printInt("scopeLine", N->getScopeLine());
-  Printer.printMetadata("containingType", N->getContainingType());
+  Printer.printMetadata("containingType", N->getRawContainingType());
   Printer.printDwarfEnum("virtuality", N->getVirtuality(),
                          dwarf::VirtualityString);
   Printer.printInt("virtualIndex", N->getVirtualIndex());
   Printer.printDIFlags("flags", N->getFlags());
   Printer.printBool("isOptimized", N->isOptimized());
-  Printer.printMetadata("function", N->getFunction());
-  Printer.printMetadata("templateParams", N->getTemplateParams());
-  Printer.printMetadata("declaration", N->getDeclaration());
-  Printer.printMetadata("variables", N->getVariables());
+  Printer.printMetadata("function", N->getRawFunction());
+  Printer.printMetadata("templateParams", N->getRawTemplateParams());
+  Printer.printMetadata("declaration", N->getRawDeclaration());
+  Printer.printMetadata("variables", N->getRawVariables());
   Out << ")";
 }
 
@@ -1653,8 +1652,8 @@ static void writeMDLexicalBlock(raw_ostream &Out, const MDLexicalBlock *N,
                               const Module *Context) {
   Out << "!MDLexicalBlock(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
   Printer.printInt("column", N->getColumn());
   Out << ")";
@@ -1667,8 +1666,8 @@ static void writeMDLexicalBlockFile(raw_ostream &Out,
                                     const Module *Context) {
   Out << "!MDLexicalBlockFile(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("discriminator", N->getDiscriminator(),
                    /* ShouldSkipZero */ false);
   Out << ")";
@@ -1680,8 +1679,8 @@ static void writeMDNamespace(raw_ostream &Out, const MDNamespace *N,
   Out << "!MDNamespace(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
   Out << ")";
 }
@@ -1694,7 +1693,7 @@ static void writeMDTemplateTypeParameter(raw_ostream &Out,
   Out << "!MDTemplateTypeParameter(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("type", N->getType(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("type", N->getRawType(), /* ShouldSkipNull */ false);
   Out << ")";
 }
 
@@ -1708,7 +1707,7 @@ static void writeMDTemplateValueParameter(raw_ostream &Out,
   if (N->getTag() != dwarf::DW_TAG_template_value_parameter)
     Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Printer.printMetadata("value", N->getValue(), /* ShouldSkipNull */ false);
   Out << ")";
 }
@@ -1720,14 +1719,14 @@ static void writeMDGlobalVariable(raw_ostream &Out, const MDGlobalVariable *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
   Printer.printString("linkageName", N->getLinkageName());
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Printer.printBool("isLocal", N->isLocalToUnit());
   Printer.printBool("isDefinition", N->isDefinition());
-  Printer.printMetadata("variable", N->getVariable());
-  Printer.printMetadata("declaration", N->getStaticDataMemberDeclaration());
+  Printer.printMetadata("variable", N->getRawVariable());
+  Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration());
   Out << ")";
 }
 
@@ -1741,12 +1740,11 @@ static void writeMDLocalVariable(raw_ostream &Out, const MDLocalVariable *N,
   Printer.printInt("arg", N->getArg(),
                    /* ShouldSkipZero */
                    N->getTag() == dwarf::DW_TAG_auto_variable);
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Printer.printDIFlags("flags", N->getFlags());
-  Printer.printMetadata("inlinedAt", N->getInlinedAt());
   Out << ")";
 }
 
@@ -1777,12 +1775,12 @@ static void writeMDObjCProperty(raw_ostream &Out, const MDObjCProperty *N,
   Out << "!MDObjCProperty(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("file", N->getFile());
+  Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
   Printer.printString("setter", N->getSetterName());
   Printer.printString("getter", N->getGetterName());
   Printer.printInt("attributes", N->getAttributes());
-  Printer.printMetadata("type", N->getType());
+  Printer.printMetadata("type", N->getRawType());
   Out << ")";
 }
 
@@ -1793,8 +1791,8 @@ static void writeMDImportedEntity(raw_ostream &Out, const MDImportedEntity *N,
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
   Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
-  Printer.printMetadata("entity", N->getEntity());
+  Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("entity", N->getRawEntity());
   Printer.printInt("line", N->getLine());
   Out << ")";
 }
@@ -1943,16 +1941,19 @@ class AssemblyWriter {
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
   SetVector<const Comdat *> Comdats;
+  bool ShouldPreserveUseListOrder;
   UseListOrderStack UseListOrders;
 
 public:
   /// Construct an AssemblyWriter with an external SlotTracker
-  AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
-                 const Module *M, AssemblyAnnotationWriter *AAW);
+  AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M,
+                 AssemblyAnnotationWriter *AAW,
+                 bool ShouldPreserveUseListOrder = false);
 
   /// Construct an AssemblyWriter with an internally allocated SlotTracker
   AssemblyWriter(formatted_raw_ostream &o, const Module *M,
-                 AssemblyAnnotationWriter *AAW);
+                 AssemblyAnnotationWriter *AAW,
+                 bool ShouldPreserveUseListOrder = false);
 
   void printMDNodeBody(const MDNode *MD);
   void printNamedMDNode(const NamedMDNode *NMD);
@@ -2004,18 +2005,20 @@ void AssemblyWriter::init() {
       Comdats.insert(C);
 }
 
-
 AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
-                               const Module *M,
-                               AssemblyAnnotationWriter *AAW)
-  : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW) {
+                               const Module *M, AssemblyAnnotationWriter *AAW,
+                               bool ShouldPreserveUseListOrder)
+    : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
   init();
 }
 
 AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M,
-                               AssemblyAnnotationWriter *AAW)
-  : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)),
-    Machine(*ModuleSlotTracker), AnnotationWriter(AAW) {
+                               AssemblyAnnotationWriter *AAW,
+                               bool ShouldPreserveUseListOrder)
+    : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)),
+      Machine(*ModuleSlotTracker), AnnotationWriter(AAW),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
   init();
 }
 
@@ -2103,7 +2106,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
 void AssemblyWriter::printModule(const Module *M) {
   Machine.initialize();
 
-  if (shouldPreserveAssemblyUseListOrder())
+  if (ShouldPreserveUseListOrder)
     UseListOrders = predictUseListOrder(M);
 
   if (!M->getModuleIdentifier().empty() &&
@@ -2778,8 +2781,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     }
 
     Operand = CI->getCalledValue();
-    PointerType *PTy = cast<PointerType>(Operand->getType());
-    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    FunctionType *FTy = cast<FunctionType>(CI->getFunctionType());
     Type *RetTy = FTy->getReturnType();
     const AttributeSet &PAL = CI->getAttributes();
 
@@ -2791,15 +2793,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     // and if the return type is not a pointer to a function.
     //
     Out << ' ';
-    if (!FTy->isVarArg() &&
-        (!RetTy->isPointerTy() ||
-         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
-      TypePrinter.print(RetTy, Out);
-      Out << ' ';
-      writeOperand(Operand, false);
-    } else {
-      writeOperand(Operand, true);
-    }
+    TypePrinter.print(FTy->isVarArg() ? FTy : RetTy, Out);
+    Out << ' ';
+    writeOperand(Operand, false);
     Out << '(';
     for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
       if (op > 0)
@@ -3060,10 +3056,18 @@ void AssemblyWriter::printUseLists(const Function *F) {
 //                       External Interface declarations
 //===----------------------------------------------------------------------===//
 
-void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(this->getParent());
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, this->getParent(), AAW);
+  W.printFunction(this);
+}
+
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
+                   bool ShouldPreserveUseListOrder) const {
   SlotTracker SlotTable(this);
   formatted_raw_ostream OS(ROS);
-  AssemblyWriter W(OS, SlotTable, this, AAW);
+  AssemblyWriter W(OS, SlotTable, this, AAW, ShouldPreserveUseListOrder);
   W.printModule(this);
 }
 
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index 199c318..d544689 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -115,10 +115,10 @@ class IntAttributeImpl : public EnumAttributeImpl {
 public:
   IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val)
       : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) {
-    assert(
-        (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment ||
-         Kind == Attribute::Dereferenceable) &&
-        "Wrong kind for int attribute!");
+    assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment ||
+            Kind == Attribute::Dereferenceable ||
+            Kind == Attribute::DereferenceableOrNull) &&
+           "Wrong kind for int attribute!");
   }
 
   uint64_t getValue() const { return Val; }
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index daac6b5..be5b74f 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -94,6 +94,12 @@ Attribute Attribute::getWithDereferenceableBytes(LLVMContext &Context,
   return get(Context, Dereferenceable, Bytes);
 }
 
+Attribute Attribute::getWithDereferenceableOrNullBytes(LLVMContext &Context,
+                                                       uint64_t Bytes) {
+  assert(Bytes && "Bytes must be non-zero.");
+  return get(Context, DereferenceableOrNull, Bytes);
+}
+
 //===----------------------------------------------------------------------===//
 // Attribute Accessor Methods
 //===----------------------------------------------------------------------===//
@@ -170,6 +176,13 @@ uint64_t Attribute::getDereferenceableBytes() const {
   return pImpl->getValueAsInt();
 }
 
+uint64_t Attribute::getDereferenceableOrNullBytes() const {
+  assert(hasAttribute(Attribute::DereferenceableOrNull) &&
+         "Trying to get dereferenceable bytes from "
+         "non-dereferenceable attribute!");
+  return pImpl->getValueAsInt();
+}
+
 std::string Attribute::getAsString(bool InAttrGrp) const {
   if (!pImpl) return "";
 
@@ -263,9 +276,9 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return Result;
   }
 
-  if (hasAttribute(Attribute::StackAlignment)) {
+  auto AttrWithBytesToString = [&](const char *Name) {
     std::string Result;
-    Result += "alignstack";
+    Result += Name;
     if (InAttrGrp) {
       Result += "=";
       Result += utostr(getValueAsInt());
@@ -275,21 +288,16 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
       Result += ")";
     }
     return Result;
-  }
+  };
 
-  if (hasAttribute(Attribute::Dereferenceable)) {
-    std::string Result;
-    Result += "dereferenceable";
-    if (InAttrGrp) {
-      Result += "=";
-      Result += utostr(getValueAsInt());
-    } else {
-      Result += "(";
-      Result += utostr(getValueAsInt());
-      Result += ")";
-    }
-    return Result;
-  }
+  if (hasAttribute(Attribute::StackAlignment))
+    return AttrWithBytesToString("alignstack");
+
+  if (hasAttribute(Attribute::Dereferenceable))
+    return AttrWithBytesToString("dereferenceable");
+
+  if (hasAttribute(Attribute::DereferenceableOrNull))
+    return AttrWithBytesToString("dereferenceable_or_null");
 
   // Convert target-dependent attributes to strings of the form:
   //
@@ -298,12 +306,12 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
   //
   if (isStringAttribute()) {
     std::string Result;
-    Result += '\"' + getKindAsString().str() + '"';
+    Result += (Twine('"') + getKindAsString() + Twine('"')).str();
 
     StringRef Val = pImpl->getValueAsString();
     if (Val.empty()) return Result;
 
-    Result += "=\"" + Val.str() + '"';
+    Result += ("=\"" + Val + Twine('"')).str();
     return Result;
   }
 
@@ -428,6 +436,11 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
   case Attribute::JumpTable:       return 1ULL << 45;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
+    break;
+  case Attribute::DereferenceableOrNull:
+    llvm_unreachable("dereferenceable_or_null attribute not supported in raw "
+                     "format");
+    break;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -663,6 +676,10 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
       Attrs.push_back(std::make_pair(Index,
                                      Attribute::getWithDereferenceableBytes(C,
                                        B.getDereferenceableBytes())));
+    else if (Kind == Attribute::DereferenceableOrNull)
+      Attrs.push_back(
+          std::make_pair(Index, Attribute::getWithDereferenceableOrNullBytes(
+                                    C, B.getDereferenceableOrNullBytes())));
     else
       Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
   }
@@ -842,6 +859,14 @@ AttributeSet AttributeSet::addDereferenceableAttr(LLVMContext &C, unsigned Index
   return addAttributes(C, Index, AttributeSet::get(C, Index, B));
 }
 
+AttributeSet AttributeSet::addDereferenceableOrNullAttr(LLVMContext &C,
+                                                        unsigned Index,
+                                                        uint64_t Bytes) const {
+  llvm::AttrBuilder B;
+  B.addDereferenceableOrNullAttr(Bytes);
+  return addAttributes(C, Index, AttributeSet::get(C, Index, B));
+}
+
 //===----------------------------------------------------------------------===//
 // AttributeSet Accessor Methods
 //===----------------------------------------------------------------------===//
@@ -1011,7 +1036,8 @@ void AttributeSet::dump() const {
 //===----------------------------------------------------------------------===//
 
 AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
-  : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0) {
+    : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
+      DerefOrNullBytes(0) {
   AttributeSetImpl *pImpl = AS.pImpl;
   if (!pImpl) return;
 
@@ -1028,7 +1054,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
 
 void AttrBuilder::clear() {
   Attrs.reset();
-  Alignment = StackAlignment = DerefBytes = 0;
+  Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
@@ -1055,6 +1081,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
     StackAlignment = Attr.getStackAlignment();
   else if (Kind == Attribute::Dereferenceable)
     DerefBytes = Attr.getDereferenceableBytes();
+  else if (Kind == Attribute::DereferenceableOrNull)
+    DerefOrNullBytes = Attr.getDereferenceableOrNullBytes();
   return *this;
 }
 
@@ -1073,6 +1101,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
     StackAlignment = 0;
   else if (Val == Attribute::Dereferenceable)
     DerefBytes = 0;
+  else if (Val == Attribute::DereferenceableOrNull)
+    DerefOrNullBytes = 0;
 
   return *this;
 }
@@ -1099,6 +1129,8 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
         StackAlignment = 0;
       else if (Kind == Attribute::Dereferenceable)
         DerefBytes = 0;
+      else if (Kind == Attribute::DereferenceableOrNull)
+        DerefOrNullBytes = 0;
     } else {
       assert(Attr.isStringAttribute() && "Invalid attribute type!");
       std::map<std::string, std::string>::iterator
@@ -1149,6 +1181,15 @@ AttrBuilder &AttrBuilder::addDereferenceableAttr(uint64_t Bytes) {
   return *this;
 }
 
+AttrBuilder &AttrBuilder::addDereferenceableOrNullAttr(uint64_t Bytes) {
+  if (Bytes == 0)
+    return *this;
+
+  Attrs[Attribute::DereferenceableOrNull] = true;
+  DerefOrNullBytes = Bytes;
+  return *this;
+}
+
 AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   // FIXME: What if both have alignments, but they don't match?!
   if (!Alignment)
@@ -1225,7 +1266,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
 
   for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
        I = Attribute::AttrKind(I + 1)) {
-    if (I == Attribute::Dereferenceable)
+    if (I == Attribute::Dereferenceable ||
+        I == Attribute::DereferenceableOrNull)
       continue;
     if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
       Attrs[I] = true;
@@ -1261,6 +1303,7 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) {
       .addAttribute(Attribute::NoCapture)
       .addAttribute(Attribute::NonNull)
       .addDereferenceableAttr(1) // the int here is ignored
+      .addDereferenceableOrNullAttr(1) // the int here is ignored
       .addAttribute(Attribute::ReadNone)
       .addAttribute(Attribute::ReadOnly)
       .addAttribute(Attribute::StructRet)
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index d2dfeaa..bb23d2c 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -124,19 +124,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     break;
   }
-  case 'd': {
-    if (Name.startswith("dbg.declare") && F->arg_size() == 2) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_declare);
-      return true;
-    }
-    if (Name.startswith("dbg.value") && F->arg_size() == 3) {
-      F->setName(Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
-      return true;
-    }
-    break;
-  }
 
   case 'o':
     // We only need to change the name to match the mangling including the
@@ -354,23 +341,6 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
   return false;
 }
 
-static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
-  if (!DbgNode || Elt >= DbgNode->getNumOperands())
-    return nullptr;
-  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt));
-}
-
-static MetadataAsValue *getExpression(Value *VarOperand, Function *F) {
-  // Old-style DIVariables have an optional expression as the 8th element.
-  DIExpression Expr(getNodeField(
-      cast<MDNode>(cast<MetadataAsValue>(VarOperand)->getMetadata()), 8));
-  if (!Expr) {
-    DIBuilder DIB(*F->getParent(), /*AllowUnresolved*/ false);
-    Expr = DIB.createExpression();
-  }
-  return MetadataAsValue::get(F->getContext(), Expr);
-}
-
 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
 // to byte shuffles.
 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
@@ -745,7 +715,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     return;
   }
 
-  std::string Name = CI->getName().str();
+  std::string Name = CI->getName();
   if (!Name.empty())
     CI->setName(Name + ".old");
 
@@ -753,25 +723,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   default:
     llvm_unreachable("Unknown function for CallInst upgrade.");
 
-  // Upgrade debug intrinsics to use an additional DIExpression argument.
-  case Intrinsic::dbg_declare: {
-    auto NewCI =
-        Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
-                            getExpression(CI->getArgOperand(1), F), Name);
-    NewCI->setDebugLoc(CI->getDebugLoc());
-    CI->replaceAllUsesWith(NewCI);
-    CI->eraseFromParent();
-    return;
-  }
-  case Intrinsic::dbg_value: {
-    auto NewCI = Builder.CreateCall4(
-        NewFn, CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
-        getExpression(CI->getArgOperand(2), F), Name);
-    NewCI->setDebugLoc(CI->getDebugLoc());
-    CI->replaceAllUsesWith(NewCI);
-    CI->eraseFromParent();
-    return;
-  }
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
     assert(CI->getNumArgOperands() == 1 &&
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index fe38385..23ec705 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -94,8 +94,8 @@ void BasicBlock::removeFromParent() {
   getParent()->getBasicBlockList().remove(this);
 }
 
-void BasicBlock::eraseFromParent() {
-  getParent()->getBasicBlockList().erase(this);
+iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() {
+  return getParent()->getBasicBlockList().erase(this);
 }
 
 /// Unlink this basic block from its current function and
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index 9fef0b2..d2e0c38 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -41,7 +41,6 @@ add_llvm_library(LLVMCore
   Type.cpp
   TypeFinder.cpp
   Use.cpp
-  UseListOrder.cpp
   User.cpp
   Value.cpp
   ValueSymbolTable.cpp
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index d97d2c4..d3caf04 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -132,7 +132,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
 
         if (ElTy == DPTy->getElementType())
           // This GEP is inbounds because all indices are zero.
-          return ConstantExpr::getInBoundsGetElementPtr(V, IdxList);
+          return ConstantExpr::getInBoundsGetElementPtr(PTy->getElementType(),
+                                                        V, IdxList);
       }
 
   // Handle casts from one vector constant to another.  We know that the src 
@@ -169,7 +170,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
       // be the same. Consequently, we just fold to V.
       return V;
 
-    if (DestTy->isFloatingPointTy())
+    // See note below regarding the PPC_FP128 restriction.
+    if (DestTy->isFloatingPointTy() && !DestTy->isPPC_FP128Ty())
       return ConstantFP::get(DestTy->getContext(),
                              APFloat(DestTy->getFltSemantics(),
                                      CI->getValue()));
@@ -179,9 +181,19 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
   }
 
   // Handle ConstantFP input: FP -> Integral.
-  if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
+  if (ConstantFP *FP = dyn_cast<ConstantFP>(V)) {
+    // PPC_FP128 is really the sum of two consecutive doubles, where the first
+    // double is always stored first in memory, regardless of the target
+    // endianness. The memory layout of i128, however, depends on the target
+    // endianness, and so we can't fold this without target endianness
+    // information. This should instead be handled by
+    // Analysis/ConstantFolding.cpp
+    if (FP->getType()->isPPC_FP128Ty())
+      return nullptr;
+
     return ConstantInt::get(FP->getContext(),
                             FP->getValueAPF().bitcastToAPInt());
+  }
 
   return nullptr;
 }
@@ -2020,7 +2032,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
 
   if (isa<UndefValue>(C)) {
     PointerType *Ptr = cast<PointerType>(C->getType());
-    Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+    Type *Ty = GetElementPtrInst::getIndexedType(
+        cast<PointerType>(Ptr->getScalarType())->getElementType(), Idxs);
     assert(Ty && "Invalid indices for GEP!");
     return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
   }
@@ -2034,7 +2047,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
       }
     if (isNull) {
       PointerType *Ptr = cast<PointerType>(C->getType());
-      Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
+      Type *Ty = GetElementPtrInst::getIndexedType(
+          cast<PointerType>(Ptr->getScalarType())->getElementType(), Idxs);
       assert(Ty && "Invalid indices for GEP!");
       return ConstantPointerNull::get(PointerType::get(Ty,
                                                        Ptr->getAddressSpace()));
@@ -2107,10 +2121,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
 
         NewIndices.push_back(Combined);
         NewIndices.append(Idxs.begin() + 1, Idxs.end());
-        return
-          ConstantExpr::getGetElementPtr(CE->getOperand(0), NewIndices,
-                                         inBounds &&
-                                           cast<GEPOperator>(CE)->isInBounds());
+        return ConstantExpr::getGetElementPtr(
+            cast<GEPOperator>(CE)->getSourceElementType(), CE->getOperand(0),
+            NewIndices, inBounds && cast<GEPOperator>(CE)->isInBounds());
       }
     }
 
@@ -2135,8 +2148,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
         if (SrcArrayTy && DstArrayTy
             && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
             && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
-          return ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0),
-                                                Idxs, inBounds);
+          return ConstantExpr::getGetElementPtr(
+              SrcArrayTy, (Constant *)CE->getOperand(0), Idxs, inBounds);
       }
     }
   }
@@ -2202,7 +2215,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
   if (!NewIdxs.empty()) {
     for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
       if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
-    return ConstantExpr::getGetElementPtr(C, NewIdxs, inBounds);
+    return ConstantExpr::getGetElementPtr(nullptr, C, NewIdxs, inBounds);
   }
 
   // If all indices are known integers and normalized, we can do a simple
@@ -2210,7 +2223,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
   if (!Unknown && !inBounds)
     if (auto *GV = dyn_cast<GlobalVariable>(C))
       if (!GV->hasExternalWeakLinkage() && isInBoundsIndices(Idxs))
-        return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
+        return ConstantExpr::getInBoundsGetElementPtr(nullptr, C, Idxs);
 
   return nullptr;
 }
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index e51a396..3f8d1f1 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -1252,7 +1252,7 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2],
                                           OnlyIfReducedTy);
   case Instruction::GetElementPtr:
-    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
+    return ConstantExpr::getGetElementPtr(nullptr, Ops[0], Ops.slice(1),
                                           cast<GEPOperator>(this)->isInBounds(),
                                           OnlyIfReducedTy);
   case Instruction::ICmp:
@@ -1925,7 +1925,7 @@ Constant *ConstantExpr::getSizeOf(Type* Ty) {
   // Note that a non-inbounds gep is used, as null isn't within any object.
   Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *GEP = getGetElementPtr(
-                 Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+      Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
   return getPtrToInt(GEP, 
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -1939,7 +1939,7 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) {
   Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
   Constant *Indices[2] = { Zero, One };
-  Constant *GEP = getGetElementPtr(NullPtr, Indices);
+  Constant *GEP = getGetElementPtr(AligningTy, NullPtr, Indices);
   return getPtrToInt(GEP,
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -1957,7 +1957,7 @@ Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
     FieldNo
   };
   Constant *GEP = getGetElementPtr(
-                Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
+      Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
   return getPtrToInt(GEP,
                      Type::getInt64Ty(Ty->getContext()));
 }
@@ -2001,19 +2001,22 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2,
   return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
 }
 
-Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
-                                         bool InBounds, Type *OnlyIfReducedTy) {
-  assert(C->getType()->isPtrOrPtrVectorTy() &&
-         "Non-pointer type for constant GetElementPtr expression");
-
+Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
+                                         ArrayRef<Value *> Idxs, bool InBounds,
+                                         Type *OnlyIfReducedTy) {
   if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs))
     return FC;          // Fold a few common cases.
 
+  if (!Ty)
+    Ty = cast<PointerType>(C->getType()->getScalarType())->getElementType();
+  else
+    assert(Ty ==
+           cast<PointerType>(C->getType()->getScalarType())->getElementType());
   // Get the result type of the getelementptr!
-  Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
-  assert(Ty && "GEP indices invalid!");
+  Type *DestTy = GetElementPtrInst::getIndexedType(Ty, Idxs);
+  assert(DestTy && "GEP indices invalid!");
   unsigned AS = C->getType()->getPointerAddressSpace();
-  Type *ReqTy = Ty->getPointerTo(AS);
+  Type *ReqTy = DestTy->getPointerTo(AS);
   if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
     ReqTy = VectorType::get(ReqTy, VecTy->getNumElements());
 
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 613147e..7fe7beb 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -1153,8 +1153,8 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
                           LLVMValueRef *ConstantIndices, unsigned NumIndices) {
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
-  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
-                                             IdxList));
+  return wrap(ConstantExpr::getGetElementPtr(
+      nullptr, unwrap<Constant>(ConstantVal), IdxList));
 }
 
 LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
@@ -1163,7 +1163,7 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
   Constant* Val = unwrap<Constant>(ConstantVal);
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
-  return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, IdxList));
+  return wrap(ConstantExpr::getInBoundsGetElementPtr(nullptr, Val, IdxList));
 }
 
 LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
@@ -2181,14 +2181,13 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
 void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
   MDNode *Loc =
       L ? cast<MDNode>(unwrap<MetadataAsValue>(L)->getMetadata()) : nullptr;
-  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
+  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc(Loc));
 }
 
 LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
   LLVMContext &Context = unwrap(Builder)->getContext();
   return wrap(MetadataAsValue::get(
-      Context,
-      unwrap(Builder)->getCurrentDebugLocation().getAsMDNode(Context)));
+      Context, unwrap(Builder)->getCurrentDebugLocation().getAsMDNode()));
 }
 
 void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
@@ -2513,12 +2512,13 @@ LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                   LLVMValueRef *Indices, unsigned NumIndices,
                                   const char *Name) {
   ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
-  return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), IdxList, Name));
+  return wrap(
+      unwrap(B)->CreateInBoundsGEP(nullptr, unwrap(Pointer), IdxList, Name));
 }
 
 LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                 unsigned Idx, const char *Name) {
-  return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+  return wrap(unwrap(B)->CreateStructGEP(nullptr, unwrap(Pointer), Idx, Name));
 }
 
 LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 9677de4..891fb86 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -74,8 +74,7 @@ void DIBuilder::trackIfUnresolved(MDNode *N) {
 }
 
 void DIBuilder::finalize() {
-  DIArray Enums = getOrCreateArray(AllEnumTypes);
-  DIType(TempEnumTypes).replaceAllUsesWith(Enums);
+  TempEnumTypes->replaceAllUsesWith(MDTuple::get(VMContext, AllEnumTypes));
 
   SmallVector<Metadata *, 16> RetainValues;
   // Declarations and definitions of the same type may be retained. Some
@@ -86,28 +85,24 @@ void DIBuilder::finalize() {
   for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
     if (RetainSet.insert(AllRetainTypes[I]).second)
       RetainValues.push_back(AllRetainTypes[I]);
-  DIArray RetainTypes = getOrCreateArray(RetainValues);
-  DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes);
-
-  DIArray SPs = getOrCreateArray(AllSubprograms);
-  DIType(TempSubprograms).replaceAllUsesWith(SPs);
-  for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-    DISubprogram SP(SPs.getElement(i));
-    if (MDNode *Temp = SP.getVariablesNodes()) {
+  TempRetainTypes->replaceAllUsesWith(MDTuple::get(VMContext, RetainValues));
+
+  MDSubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
+  TempSubprograms->replaceAllUsesWith(SPs.get());
+  for (auto *SP : SPs) {
+    if (MDTuple *Temp = SP->getVariables().get()) {
       const auto &PV = PreservedVariables.lookup(SP);
       SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
       DIArray AV = getOrCreateArray(Variables);
-      DIType(Temp).replaceAllUsesWith(AV);
+      TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
     }
   }
 
-  DIArray GVs = getOrCreateArray(AllGVs);
-  DIType(TempGVs).replaceAllUsesWith(GVs);
+  TempGVs->replaceAllUsesWith(MDTuple::get(VMContext, AllGVs));
 
-  SmallVector<Metadata *, 16> RetainValuesI(AllImportedModules.begin(),
-                                            AllImportedModules.end());
-  DIArray IMs = getOrCreateArray(RetainValuesI);
-  DIType(TempImportedModules).replaceAllUsesWith(IMs);
+  TempImportedModules->replaceAllUsesWith(MDTuple::get(
+      VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(),
+                                             AllImportedModules.end())));
 
   // Now that all temp nodes have been replaced or deleted, resolve remaining
   // cycles.
@@ -121,19 +116,16 @@ void DIBuilder::finalize() {
 }
 
 /// If N is compile unit return NULL otherwise return N.
-static MDScope *getNonCompileUnitScope(MDNode *N) {
+static MDScope *getNonCompileUnitScope(MDScope *N) {
   if (!N || isa<MDCompileUnit>(N))
     return nullptr;
   return cast<MDScope>(N);
 }
 
-DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
-                                           StringRef Directory,
-                                           StringRef Producer, bool isOptimized,
-                                           StringRef Flags, unsigned RunTimeVer,
-                                           StringRef SplitName,
-                                           DebugEmissionKind Kind,
-                                           bool EmitDebugInfo) {
+MDCompileUnit *DIBuilder::createCompileUnit(
+    unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer,
+    bool isOptimized, StringRef Flags, unsigned RunTimeVer, StringRef SplitName,
+    DebugEmissionKind Kind, bool EmitDebugInfo) {
 
   assert(((Lang <= dwarf::DW_LANG_Fortran08 && Lang >= dwarf::DW_LANG_C89) ||
           (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
@@ -143,18 +135,19 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
 
   // TODO: Once we make MDCompileUnit distinct, stop using temporaries here
   // (just start with operands assigned to nullptr).
-  TempEnumTypes = MDTuple::getTemporary(VMContext, None).release();
-  TempRetainTypes = MDTuple::getTemporary(VMContext, None).release();
-  TempSubprograms = MDTuple::getTemporary(VMContext, None).release();
-  TempGVs = MDTuple::getTemporary(VMContext, None).release();
-  TempImportedModules = MDTuple::getTemporary(VMContext, None).release();
+  TempEnumTypes = MDTuple::getTemporary(VMContext, None);
+  TempRetainTypes = MDTuple::getTemporary(VMContext, None);
+  TempSubprograms = MDTuple::getTemporary(VMContext, None);
+  TempGVs = MDTuple::getTemporary(VMContext, None);
+  TempImportedModules = MDTuple::getTemporary(VMContext, None);
 
   // TODO: Switch to getDistinct().  We never want to merge compile units based
   // on contents.
-  MDNode *CUNode = MDCompileUnit::get(
+  MDCompileUnit *CUNode = MDCompileUnit::get(
       VMContext, Lang, MDFile::get(VMContext, Filename, Directory), Producer,
-      isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes,
-      TempRetainTypes, TempSubprograms, TempGVs, TempImportedModules);
+      isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes.get(),
+      TempRetainTypes.get(), TempSubprograms.get(), TempGVs.get(),
+      TempImportedModules.get());
 
   // Create a named metadata so that it is easier to find cu in a module.
   // Note that we only generate this when the caller wants to actually
@@ -167,141 +160,136 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
   }
 
   trackIfUnresolved(CUNode);
-  return DICompileUnit(CUNode);
+  return CUNode;
 }
 
-static DIImportedEntity
-createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context,
+static MDImportedEntity*
+createImportedModule(LLVMContext &C, dwarf::Tag Tag, MDScope* Context,
                      Metadata *NS, unsigned Line, StringRef Name,
                      SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) {
-  DIImportedEntity M = MDImportedEntity::get(C, Tag, Context, NS, Line, Name);
-  assert(M.Verify() && "Imported module should be valid");
-  AllImportedModules.emplace_back(M.get());
+  auto *M =
+      MDImportedEntity::get(C, Tag, Context, DebugNodeRef(NS), Line, Name);
+  AllImportedModules.emplace_back(M);
   return M;
 }
 
-DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
-                                                 DINameSpace NS,
+MDImportedEntity* DIBuilder::createImportedModule(MDScope* Context,
+                                                 MDNamespace* NS,
                                                  unsigned Line) {
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
                                 Context, NS, Line, StringRef(), AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
-                                                 DIImportedEntity NS,
+MDImportedEntity* DIBuilder::createImportedModule(MDScope* Context,
+                                                 MDImportedEntity* NS,
                                                  unsigned Line) {
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
                                 Context, NS, Line, StringRef(), AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
-                                                      DIDescriptor Decl,
-                                                      unsigned Line, StringRef Name) {
+MDImportedEntity *DIBuilder::createImportedDeclaration(MDScope *Context,
+                                                       DebugNode *Decl,
+                                                       unsigned Line,
+                                                       StringRef Name) {
   // Make sure to use the unique identifier based metadata reference for
   // types that have one.
-  Metadata *V =
-      Decl.isType() ? static_cast<Metadata *>(DIType(Decl).getRef()) : Decl;
   return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
-                                Context, V, Line, Name,
+                                Context, DebugNodeRef::get(Decl), Line, Name,
                                 AllImportedModules);
 }
 
-DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
-                                                      DIImportedEntity Imp,
-                                                      unsigned Line, StringRef Name) {
-  return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
-                                Context, Imp, Line, Name, AllImportedModules);
-}
-
-DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+MDFile* DIBuilder::createFile(StringRef Filename, StringRef Directory) {
   return MDFile::get(VMContext, Filename, Directory);
 }
 
-DIEnumerator DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
+MDEnumerator *DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
   assert(!Name.empty() && "Unable to create enumerator without name");
   return MDEnumerator::get(VMContext, Val, Name);
 }
 
-DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) {
+MDBasicType *DIBuilder::createUnspecifiedType(StringRef Name) {
   assert(!Name.empty() && "Unable to create type without name");
   return MDBasicType::get(VMContext, dwarf::DW_TAG_unspecified_type, Name);
 }
 
-DIBasicType DIBuilder::createNullPtrType() {
+MDBasicType *DIBuilder::createNullPtrType() {
   return createUnspecifiedType("decltype(nullptr)");
 }
 
-DIBasicType
-DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
-                           uint64_t AlignInBits, unsigned Encoding) {
+MDBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
+                                        uint64_t AlignInBits,
+                                        unsigned Encoding) {
   assert(!Name.empty() && "Unable to create type without name");
   return MDBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits,
                           AlignInBits, Encoding);
 }
 
-DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
+MDDerivedType *DIBuilder::createQualifiedType(unsigned Tag, MDType *FromTy) {
   return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
-                            FromTy.getRef(), 0, 0, 0, 0);
+                            MDTypeRef::get(FromTy), 0, 0, 0, 0);
 }
 
-DIDerivedType
-DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
-                             uint64_t AlignInBits, StringRef Name) {
+MDDerivedType *DIBuilder::createPointerType(MDType *PointeeTy,
+                                            uint64_t SizeInBits,
+                                            uint64_t AlignInBits,
+                                            StringRef Name) {
   // FIXME: Why is there a name here?
   return MDDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
-                            nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits,
-                            AlignInBits, 0, 0);
+                            nullptr, 0, nullptr, MDTypeRef::get(PointeeTy),
+                            SizeInBits, AlignInBits, 0, 0);
 }
 
-DIDerivedType
-DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base,
-                                   uint64_t SizeInBits, uint64_t AlignInBits) {
+MDDerivedType *DIBuilder::createMemberPointerType(MDType *PointeeTy,
+                                                  MDType *Base,
+                                                  uint64_t SizeInBits,
+                                                  uint64_t AlignInBits) {
   return MDDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "",
-                            nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits,
-                            AlignInBits, 0, 0, Base.getRef());
+                            nullptr, 0, nullptr, MDTypeRef::get(PointeeTy),
+                            SizeInBits, AlignInBits, 0, 0, MDTypeRef::get(Base));
 }
 
-DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
-  assert(RTy.isType() && "Unable to create reference type");
+MDDerivedType *DIBuilder::createReferenceType(unsigned Tag, MDType *RTy) {
+  assert(RTy && "Unable to create reference type");
   return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
-                            RTy.getRef(), 0, 0, 0, 0);
+                            MDTypeRef::get(RTy), 0, 0, 0, 0);
 }
 
-DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
-                                       unsigned LineNo, DIDescriptor Context) {
-  return MDDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name,
-                            File.getFileNode(), LineNo,
-                            DIScope(getNonCompileUnitScope(Context)).getRef(),
-                            Ty.getRef(), 0, 0, 0, 0);
+MDDerivedType *DIBuilder::createTypedef(MDType *Ty, StringRef Name,
+                                        MDFile *File, unsigned LineNo,
+                                        MDScope *Context) {
+  return MDDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
+                            LineNo,
+                            MDScopeRef::get(getNonCompileUnitScope(Context)),
+                            MDTypeRef::get(Ty), 0, 0, 0, 0);
 }
 
-DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
-  // typedefs are encoded in DIDerivedType format.
-  assert(Ty.isType() && "Invalid type!");
-  assert(FriendTy.isType() && "Invalid friend type!");
+MDDerivedType *DIBuilder::createFriend(MDType *Ty, MDType *FriendTy) {
+  assert(Ty && "Invalid type!");
+  assert(FriendTy && "Invalid friend type!");
   return MDDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0,
-                            Ty.getRef(), FriendTy.getRef(), 0, 0, 0, 0);
+                            MDTypeRef::get(Ty), MDTypeRef::get(FriendTy), 0, 0,
+                            0, 0);
 }
 
-DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
-                                           uint64_t BaseOffset,
-                                           unsigned Flags) {
-  assert(Ty.isType() && "Unable to create inheritance");
+MDDerivedType *DIBuilder::createInheritance(MDType *Ty, MDType *BaseTy,
+                                            uint64_t BaseOffset,
+                                            unsigned Flags) {
+  assert(Ty && "Unable to create inheritance");
   return MDDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
-                            0, Ty.getRef(), BaseTy.getRef(), 0, 0, BaseOffset,
-                            Flags);
+                            0, MDTypeRef::get(Ty), MDTypeRef::get(BaseTy), 0, 0,
+                            BaseOffset, Flags);
 }
 
-DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
-                                          DIFile File, unsigned LineNumber,
-                                          uint64_t SizeInBits,
-                                          uint64_t AlignInBits,
-                                          uint64_t OffsetInBits, unsigned Flags,
-                                          DIType Ty) {
+MDDerivedType *DIBuilder::createMemberType(MDScope *Scope, StringRef Name,
+                                           MDFile *File, unsigned LineNumber,
+                                           uint64_t SizeInBits,
+                                           uint64_t AlignInBits,
+                                           uint64_t OffsetInBits,
+                                           unsigned Flags, MDType *Ty) {
   return MDDerivedType::get(
       VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), SizeInBits,
-      AlignInBits, OffsetInBits, Flags);
+      MDScopeRef::get(getNonCompileUnitScope(Scope)), MDTypeRef::get(Ty),
+      SizeInBits, AlignInBits, OffsetInBits, Flags);
 }
 
 static ConstantAsMetadata *getConstantOrNull(Constant *C) {
@@ -310,135 +298,124 @@ static ConstantAsMetadata *getConstantOrNull(Constant *C) {
   return nullptr;
 }
 
-DIDerivedType DIBuilder::createStaticMemberType(DIDescriptor Scope,
-                                                StringRef Name, DIFile File,
-                                                unsigned LineNumber, DIType Ty,
-                                                unsigned Flags,
-                                                llvm::Constant *Val) {
-  // TAG_member is encoded in DIDerivedType format.
-  Flags |= DIDescriptor::FlagStaticMember;
+MDDerivedType *DIBuilder::createStaticMemberType(MDScope *Scope, StringRef Name,
+                                                 MDFile *File,
+                                                 unsigned LineNumber,
+                                                 MDType *Ty, unsigned Flags,
+                                                 llvm::Constant *Val) {
+  Flags |= DebugNode::FlagStaticMember;
   return MDDerivedType::get(
       VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), 0, 0, 0,
-      Flags, getConstantOrNull(Val));
+      MDScopeRef::get(getNonCompileUnitScope(Scope)), MDTypeRef::get(Ty), 0, 0,
+      0, Flags, getConstantOrNull(Val));
 }
 
-DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File,
-                                        unsigned LineNumber,
-                                        uint64_t SizeInBits,
-                                        uint64_t AlignInBits,
-                                        uint64_t OffsetInBits, unsigned Flags,
-                                        DIType Ty, MDNode *PropertyNode) {
-  return MDDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
-                            LineNumber, getNonCompileUnitScope(File),
-                            Ty.getRef(), SizeInBits, AlignInBits, OffsetInBits,
-                            Flags, PropertyNode);
+MDDerivedType *DIBuilder::createObjCIVar(StringRef Name, MDFile *File,
+                                         unsigned LineNumber,
+                                         uint64_t SizeInBits,
+                                         uint64_t AlignInBits,
+                                         uint64_t OffsetInBits, unsigned Flags,
+                                         MDType *Ty, MDNode *PropertyNode) {
+  return MDDerivedType::get(
+      VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
+      MDScopeRef::get(getNonCompileUnitScope(File)), MDTypeRef::get(Ty),
+      SizeInBits, AlignInBits, OffsetInBits, Flags, PropertyNode);
 }
 
-DIObjCProperty
-DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber,
+MDObjCProperty *
+DIBuilder::createObjCProperty(StringRef Name, MDFile *File, unsigned LineNumber,
                               StringRef GetterName, StringRef SetterName,
-                              unsigned PropertyAttributes, DIType Ty) {
+                              unsigned PropertyAttributes, MDType *Ty) {
   return MDObjCProperty::get(VMContext, Name, File, LineNumber, GetterName,
                              SetterName, PropertyAttributes, Ty);
 }
 
-DITemplateTypeParameter
-DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
-                                       DIType Ty) {
-  assert(!DIScope(getNonCompileUnitScope(Context)).getRef() &&
-         "Expected compile unit");
-  return MDTemplateTypeParameter::get(VMContext, Name, Ty.getRef());
+MDTemplateTypeParameter *
+DIBuilder::createTemplateTypeParameter(MDScope *Context, StringRef Name,
+                                       MDType *Ty) {
+  assert((!Context || isa<MDCompileUnit>(Context)) && "Expected compile unit");
+  return MDTemplateTypeParameter::get(VMContext, Name, MDTypeRef::get(Ty));
 }
 
-static DITemplateValueParameter
+static MDTemplateValueParameter *
 createTemplateValueParameterHelper(LLVMContext &VMContext, unsigned Tag,
-                                   DIDescriptor Context, StringRef Name,
-                                   DIType Ty, Metadata *MD) {
-  assert(!DIScope(getNonCompileUnitScope(Context)).getRef() &&
-         "Expected compile unit");
-  return MDTemplateValueParameter::get(VMContext, Tag, Name, Ty.getRef(), MD);
+                                   MDScope *Context, StringRef Name, MDType *Ty,
+                                   Metadata *MD) {
+  assert((!Context || isa<MDCompileUnit>(Context)) && "Expected compile unit");
+  return MDTemplateValueParameter::get(VMContext, Tag, Name, MDTypeRef::get(Ty),
+                                       MD);
 }
 
-DITemplateValueParameter
-DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
-                                        DIType Ty, Constant *Val) {
+MDTemplateValueParameter *
+DIBuilder::createTemplateValueParameter(MDScope *Context, StringRef Name,
+                                        MDType *Ty, Constant *Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_template_value_parameter, Context, Name, Ty,
       getConstantOrNull(Val));
 }
 
-DITemplateValueParameter
-DIBuilder::createTemplateTemplateParameter(DIDescriptor Context, StringRef Name,
-                                           DIType Ty, StringRef Val) {
+MDTemplateValueParameter *
+DIBuilder::createTemplateTemplateParameter(MDScope *Context, StringRef Name,
+                                           MDType *Ty, StringRef Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_GNU_template_template_param, Context, Name, Ty,
       MDString::get(VMContext, Val));
 }
 
-DITemplateValueParameter
-DIBuilder::createTemplateParameterPack(DIDescriptor Context, StringRef Name,
-                                       DIType Ty, DIArray Val) {
+MDTemplateValueParameter *
+DIBuilder::createTemplateParameterPack(MDScope *Context, StringRef Name,
+                                       MDType *Ty, DIArray Val) {
   return createTemplateValueParameterHelper(
       VMContext, dwarf::DW_TAG_GNU_template_parameter_pack, Context, Name, Ty,
-      Val);
+      Val.get());
 }
 
-DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
-                                           DIFile File, unsigned LineNumber,
-                                           uint64_t SizeInBits,
-                                           uint64_t AlignInBits,
-                                           uint64_t OffsetInBits,
-                                           unsigned Flags, DIType DerivedFrom,
-                                           DIArray Elements,
-                                           DIType VTableHolder,
-                                           MDNode *TemplateParams,
-                                           StringRef UniqueIdentifier) {
-  assert((!Context || Context.isScope() || Context.isType()) &&
+MDCompositeType *DIBuilder::createClassType(
+    MDScope *Context, StringRef Name, MDFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
+    unsigned Flags, MDType *DerivedFrom, DIArray Elements, MDType *VTableHolder,
+    MDNode *TemplateParams, StringRef UniqueIdentifier) {
+  assert((!Context || isa<MDScope>(Context)) &&
          "createClassType should be called with a valid Context");
-  // TAG_class_type is encoded in DICompositeType format.
-  DICompositeType R = MDCompositeType::get(
+
+  auto *R = MDCompositeType::get(
       VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(),
-      SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, 0,
-      VTableHolder.getRef(), TemplateParams, UniqueIdentifier);
+      MDScopeRef::get(getNonCompileUnitScope(Context)),
+      MDTypeRef::get(DerivedFrom), SizeInBits, AlignInBits, OffsetInBits, Flags,
+      Elements, 0, MDTypeRef::get(VTableHolder),
+      cast_or_null<MDTuple>(TemplateParams), UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(R);
   trackIfUnresolved(R);
   return R;
 }
 
-DICompositeType DIBuilder::createStructType(DIDescriptor Context,
-                                            StringRef Name, DIFile File,
-                                            unsigned LineNumber,
-                                            uint64_t SizeInBits,
-                                            uint64_t AlignInBits,
-                                            unsigned Flags, DIType DerivedFrom,
-                                            DIArray Elements,
-                                            unsigned RunTimeLang,
-                                            DIType VTableHolder,
-                                            StringRef UniqueIdentifier) {
-  DICompositeType R = MDCompositeType::get(
+MDCompositeType *DIBuilder::createStructType(
+    MDScope *Context, StringRef Name, MDFile *File, unsigned LineNumber,
+    uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+    MDType *DerivedFrom, DIArray Elements, unsigned RunTimeLang,
+    MDType *VTableHolder, StringRef UniqueIdentifier) {
+  auto *R = MDCompositeType::get(
       VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(),
-      SizeInBits, AlignInBits, 0, Flags, Elements, RunTimeLang,
-      VTableHolder.getRef(), nullptr, UniqueIdentifier);
+      MDScopeRef::get(getNonCompileUnitScope(Context)),
+      MDTypeRef::get(DerivedFrom), SizeInBits, AlignInBits, 0, Flags, Elements,
+      RunTimeLang, MDTypeRef::get(VTableHolder), nullptr, UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(R);
   trackIfUnresolved(R);
   return R;
 }
 
-DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
-                                           DIFile File, unsigned LineNumber,
+MDCompositeType* DIBuilder::createUnionType(MDScope * Scope, StringRef Name,
+                                           MDFile* File, unsigned LineNumber,
                                            uint64_t SizeInBits,
                                            uint64_t AlignInBits, unsigned Flags,
                                            DIArray Elements,
                                            unsigned RunTimeLang,
                                            StringRef UniqueIdentifier) {
-  DICompositeType R = MDCompositeType::get(
+  auto *R = MDCompositeType::get(
       VMContext, dwarf::DW_TAG_union_type, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
+      MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, SizeInBits,
       AlignInBits, 0, Flags, Elements, RunTimeLang, nullptr, nullptr,
       UniqueIdentifier);
   if (!UniqueIdentifier.empty())
@@ -447,21 +424,21 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
   return R;
 }
 
-DISubroutineType DIBuilder::createSubroutineType(DIFile File,
-                                                 DITypeArray ParameterTypes,
-                                                 unsigned Flags) {
+MDSubroutineType *DIBuilder::createSubroutineType(MDFile *File,
+                                                  DITypeArray ParameterTypes,
+                                                  unsigned Flags) {
   return MDSubroutineType::get(VMContext, Flags, ParameterTypes);
 }
 
-DICompositeType DIBuilder::createEnumerationType(
-    DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
+MDCompositeType *DIBuilder::createEnumerationType(
+    MDScope *Scope, StringRef Name, MDFile *File, unsigned LineNumber,
     uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
-    DIType UnderlyingType, StringRef UniqueIdentifier) {
-  DICompositeType CTy = MDCompositeType::get(
+    MDType *UnderlyingType, StringRef UniqueIdentifier) {
+  auto *CTy = MDCompositeType::get(
       VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), UnderlyingType.getRef(),
-      SizeInBits, AlignInBits, 0, 0, Elements, 0, nullptr, nullptr,
-      UniqueIdentifier);
+      MDScopeRef::get(getNonCompileUnitScope(Scope)),
+      MDTypeRef::get(UnderlyingType), SizeInBits, AlignInBits, 0, 0, Elements,
+      0, nullptr, nullptr, UniqueIdentifier);
   AllEnumTypes.push_back(CTy);
   if (!UniqueIdentifier.empty())
     retainType(CTy);
@@ -469,63 +446,66 @@ DICompositeType DIBuilder::createEnumerationType(
   return CTy;
 }
 
-DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
-                                           DIType Ty, DIArray Subscripts) {
+MDCompositeType *DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
+                                            MDType *Ty, DIArray Subscripts) {
   auto *R = MDCompositeType::get(VMContext, dwarf::DW_TAG_array_type, "",
-                                 nullptr, 0, nullptr, Ty.getRef(), Size,
+                                 nullptr, 0, nullptr, MDTypeRef::get(Ty), Size,
                                  AlignInBits, 0, 0, Subscripts, 0, nullptr);
   trackIfUnresolved(R);
   return R;
 }
 
-DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
-                                            DIType Ty, DIArray Subscripts) {
-  auto *R = MDCompositeType::get(
-      VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, nullptr, Ty.getRef(),
-      Size, AlignInBits, 0, DIType::FlagVector, Subscripts, 0, nullptr);
+MDCompositeType *DIBuilder::createVectorType(uint64_t Size,
+                                             uint64_t AlignInBits, MDType *Ty,
+                                             DIArray Subscripts) {
+  auto *R =
+      MDCompositeType::get(VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0,
+                           nullptr, MDTypeRef::get(Ty), Size, AlignInBits, 0,
+                           DebugNode::FlagVector, Subscripts, 0, nullptr);
   trackIfUnresolved(R);
   return R;
 }
 
-static DIType createTypeWithFlags(LLVMContext &Context, DIType Ty,
-                                  unsigned FlagsToSet) {
-  TempMDType NewTy = cast<MDType>(static_cast<MDNode *>(Ty))->clone();
+static MDType *createTypeWithFlags(LLVMContext &Context, MDType *Ty,
+                                   unsigned FlagsToSet) {
+  auto NewTy = Ty->clone();
   NewTy->setFlags(NewTy->getFlags() | FlagsToSet);
   return MDNode::replaceWithUniqued(std::move(NewTy));
 }
 
-DIType DIBuilder::createArtificialType(DIType Ty) {
+MDType *DIBuilder::createArtificialType(MDType *Ty) {
   // FIXME: Restrict this to the nodes where it's valid.
-  if (Ty.isArtificial())
+  if (Ty->isArtificial())
     return Ty;
-  return createTypeWithFlags(VMContext, Ty, DIType::FlagArtificial);
+  return createTypeWithFlags(VMContext, Ty, DebugNode::FlagArtificial);
 }
 
-DIType DIBuilder::createObjectPointerType(DIType Ty) {
+MDType *DIBuilder::createObjectPointerType(MDType *Ty) {
   // FIXME: Restrict this to the nodes where it's valid.
-  if (Ty.isObjectPointer())
+  if (Ty->isObjectPointer())
     return Ty;
-  unsigned Flags = DIType::FlagObjectPointer | DIType::FlagArtificial;
+  unsigned Flags = DebugNode::FlagObjectPointer | DebugNode::FlagArtificial;
   return createTypeWithFlags(VMContext, Ty, Flags);
 }
 
-void DIBuilder::retainType(DIType T) { AllRetainTypes.emplace_back(T); }
-
-DIBasicType DIBuilder::createUnspecifiedParameter() {
-  return DIBasicType();
+void DIBuilder::retainType(MDType *T) {
+  assert(T && "Expected non-null type");
+  AllRetainTypes.emplace_back(T);
 }
 
-DICompositeType
-DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
-                             DIFile F, unsigned Line, unsigned RuntimeLang,
+MDBasicType *DIBuilder::createUnspecifiedParameter() { return nullptr; }
+
+MDCompositeType*
+DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, MDScope * Scope,
+                             MDFile* F, unsigned Line, unsigned RuntimeLang,
                              uint64_t SizeInBits, uint64_t AlignInBits,
                              StringRef UniqueIdentifier) {
   // FIXME: Define in terms of createReplaceableForwardDecl() by calling
   // replaceWithUniqued().
-  DICompositeType RetTy = MDCompositeType::get(
-      VMContext, Tag, Name, F.getFileNode(), Line,
-      DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
-      AlignInBits, 0, DIDescriptor::FlagFwdDecl, nullptr, RuntimeLang, nullptr,
+  auto *RetTy = MDCompositeType::get(
+      VMContext, Tag, Name, F, Line,
+      MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr, SizeInBits,
+      AlignInBits, 0, DebugNode::FlagFwdDecl, nullptr, RuntimeLang, nullptr,
       nullptr, UniqueIdentifier);
   if (!UniqueIdentifier.empty())
     retainType(RetTy);
@@ -533,16 +513,15 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
   return RetTy;
 }
 
-DICompositeType DIBuilder::createReplaceableCompositeType(
-    unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line,
+MDCompositeType* DIBuilder::createReplaceableCompositeType(
+    unsigned Tag, StringRef Name, MDScope * Scope, MDFile* F, unsigned Line,
     unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits,
     unsigned Flags, StringRef UniqueIdentifier) {
-  DICompositeType RetTy =
-      MDCompositeType::getTemporary(
-          VMContext, Tag, Name, F.getFileNode(), Line,
-          DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
-          AlignInBits, 0, Flags, nullptr, RuntimeLang,
-          nullptr, nullptr, UniqueIdentifier).release();
+  auto *RetTy = MDCompositeType::getTemporary(
+                    VMContext, Tag, Name, F, Line,
+                    MDScopeRef::get(getNonCompileUnitScope(Scope)), nullptr,
+                    SizeInBits, AlignInBits, 0, Flags, nullptr, RuntimeLang,
+                    nullptr, nullptr, UniqueIdentifier).release();
   if (!UniqueIdentifier.empty())
     retainType(RetTy);
   trackIfUnresolved(RetTy);
@@ -550,102 +529,102 @@ DICompositeType DIBuilder::createReplaceableCompositeType(
 }
 
 DIArray DIBuilder::getOrCreateArray(ArrayRef<Metadata *> Elements) {
-  return DIArray(MDNode::get(VMContext, Elements));
+  return MDTuple::get(VMContext, Elements);
 }
 
 DITypeArray DIBuilder::getOrCreateTypeArray(ArrayRef<Metadata *> Elements) {
   SmallVector<llvm::Metadata *, 16> Elts;
   for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
     if (Elements[i] && isa<MDNode>(Elements[i]))
-      Elts.push_back(DIType(cast<MDNode>(Elements[i])).getRef());
+      Elts.push_back(MDTypeRef::get(cast<MDType>(Elements[i])));
     else
       Elts.push_back(Elements[i]);
   }
   return DITypeArray(MDNode::get(VMContext, Elts));
 }
 
-DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
+MDSubrange *DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
   return MDSubrange::get(VMContext, Count, Lo);
 }
 
-static void checkGlobalVariableScope(DIDescriptor Context) {
-  MDNode *TheCtx = getNonCompileUnitScope(Context);
-  if (DIScope(TheCtx).isCompositeType()) {
-    assert(!DICompositeType(TheCtx).getIdentifier() &&
+static void checkGlobalVariableScope(MDScope * Context) {
+#ifndef NDEBUG
+  if (auto *CT =
+          dyn_cast_or_null<MDCompositeType>(getNonCompileUnitScope(Context)))
+    assert(CT->getIdentifier().empty() &&
            "Context of a global variable should not be a type with identifier");
-  }
+#endif
 }
 
-DIGlobalVariable DIBuilder::createGlobalVariable(
-    DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
-    unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
+MDGlobalVariable *DIBuilder::createGlobalVariable(
+    MDScope *Context, StringRef Name, StringRef LinkageName, MDFile *F,
+    unsigned LineNumber, MDType *Ty, bool isLocalToUnit, Constant *Val,
     MDNode *Decl) {
   checkGlobalVariableScope(Context);
 
-  auto *N = MDGlobalVariable::get(VMContext, Context, Name, LinkageName, F,
-                                  LineNumber, Ty, isLocalToUnit, true,
-                                  getConstantOrNull(Val), Decl);
+  auto *N = MDGlobalVariable::get(VMContext, cast_or_null<MDScope>(Context),
+                                  Name, LinkageName, F, LineNumber,
+                                  MDTypeRef::get(Ty), isLocalToUnit, true, Val,
+                                  cast_or_null<MDDerivedType>(Decl));
   AllGVs.push_back(N);
   return N;
 }
 
-DIGlobalVariable DIBuilder::createTempGlobalVariableFwdDecl(
-    DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
-    unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
+MDGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
+    MDScope *Context, StringRef Name, StringRef LinkageName, MDFile *F,
+    unsigned LineNumber, MDType *Ty, bool isLocalToUnit, Constant *Val,
     MDNode *Decl) {
   checkGlobalVariableScope(Context);
 
-  return MDGlobalVariable::getTemporary(VMContext, Context, Name, LinkageName,
-                                        F, LineNumber, Ty, isLocalToUnit, false,
-                                        getConstantOrNull(Val), Decl).release();
+  return MDGlobalVariable::getTemporary(
+             VMContext, cast_or_null<MDScope>(Context), Name, LinkageName, F,
+             LineNumber, MDTypeRef::get(Ty), isLocalToUnit, false, Val,
+             cast_or_null<MDDerivedType>(Decl))
+      .release();
 }
 
-DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
-                                          StringRef Name, DIFile File,
-                                          unsigned LineNo, DITypeRef Ty,
-                                          bool AlwaysPreserve, unsigned Flags,
-                                          unsigned ArgNo) {
+MDLocalVariable *DIBuilder::createLocalVariable(
+    unsigned Tag, MDScope *Scope, StringRef Name, MDFile *File, unsigned LineNo,
+    MDType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
   // FIXME: Why getNonCompileUnitScope()?
   // FIXME: Why is "!Context" okay here?
   // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT
   // the only valid scopes)?
-  DIDescriptor Context(getNonCompileUnitScope(Scope));
-  assert((!Context || Context.isScope()) &&
-         "createLocalVariable should be called with a valid Context");
+  MDScope* Context = getNonCompileUnitScope(Scope);
 
-  auto *Node =
-      MDLocalVariable::get(VMContext, Tag, getNonCompileUnitScope(Scope), Name,
-                           File, LineNo, Ty, ArgNo, Flags);
+  auto *Node = MDLocalVariable::get(
+      VMContext, Tag, cast_or_null<MDLocalScope>(Context), Name, File, LineNo,
+      MDTypeRef::get(Ty), ArgNo, Flags);
   if (AlwaysPreserve) {
     // The optimizer may remove local variable. If there is an interest
     // to preserve variable info in such situation then stash it in a
     // named mdnode.
-    DISubprogram Fn(getDISubprogram(Scope));
+    MDSubprogram *Fn = getDISubprogram(Scope);
     assert(Fn && "Missing subprogram for local variable");
     PreservedVariables[Fn].emplace_back(Node);
   }
   return Node;
 }
 
-DIExpression DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
+MDExpression* DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
   return MDExpression::get(VMContext, Addr);
 }
 
-DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
+MDExpression* DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
   // TODO: Remove the callers of this signed version and delete.
   SmallVector<uint64_t, 8> Addr(Signed.begin(), Signed.end());
   return createExpression(Addr);
 }
 
-DIExpression DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
+MDExpression* DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
                                                  unsigned SizeInBytes) {
   uint64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBytes, SizeInBytes};
   return MDExpression::get(VMContext, Addr);
 }
 
-DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
-                                       StringRef LinkageName, DIFile File,
-                                       unsigned LineNo, DICompositeType Ty,
+MDSubprogram* DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
+                                       StringRef LinkageName, MDFile* File,
+                                       unsigned LineNo, MDSubroutineType* Ty,
                                        bool isLocalToUnit, bool isDefinition,
                                        unsigned ScopeLine, unsigned Flags,
                                        bool isOptimized, Function *Fn,
@@ -658,20 +637,21 @@ DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
                         Flags, isOptimized, Fn, TParams, Decl);
 }
 
-DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
-                                       StringRef LinkageName, DIFile File,
-                                       unsigned LineNo, DICompositeType Ty,
+MDSubprogram* DIBuilder::createFunction(MDScope * Context, StringRef Name,
+                                       StringRef LinkageName, MDFile* File,
+                                       unsigned LineNo, MDSubroutineType* Ty,
                                        bool isLocalToUnit, bool isDefinition,
                                        unsigned ScopeLine, unsigned Flags,
                                        bool isOptimized, Function *Fn,
                                        MDNode *TParams, MDNode *Decl) {
-  assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
+  assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
          "function types should be subroutines");
   auto *Node = MDSubprogram::get(
-      VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name,
-      LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit, isDefinition,
-      ScopeLine, nullptr, 0, 0, Flags, isOptimized, getConstantOrNull(Fn),
-      TParams, Decl, MDNode::getTemporary(VMContext, None).release());
+      VMContext, MDScopeRef::get(getNonCompileUnitScope(Context)), Name,
+      LinkageName, File, LineNo, Ty,
+      isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, Flags, isOptimized,
+      Fn, cast_or_null<MDTuple>(TParams), cast_or_null<MDSubprogram>(Decl),
+      MDTuple::getTemporary(VMContext, None).release());
 
   if (isDefinition)
     AllSubprograms.push_back(Node);
@@ -679,78 +659,64 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
   return Node;
 }
 
-DISubprogram
-DIBuilder::createTempFunctionFwdDecl(DIDescriptor Context, StringRef Name,
-                                     StringRef LinkageName, DIFile File,
-                                     unsigned LineNo, DICompositeType Ty,
+MDSubprogram*
+DIBuilder::createTempFunctionFwdDecl(MDScope * Context, StringRef Name,
+                                     StringRef LinkageName, MDFile* File,
+                                     unsigned LineNo, MDSubroutineType* Ty,
                                      bool isLocalToUnit, bool isDefinition,
                                      unsigned ScopeLine, unsigned Flags,
                                      bool isOptimized, Function *Fn,
                                      MDNode *TParams, MDNode *Decl) {
   return MDSubprogram::getTemporary(
-             VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name,
-             LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit,
-             isDefinition, ScopeLine, nullptr, 0, 0, Flags, isOptimized,
-             getConstantOrNull(Fn), TParams, Decl, nullptr).release();
-}
-
-DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
-                                     StringRef LinkageName, DIFile F,
-                                     unsigned LineNo, DICompositeType Ty,
-                                     bool isLocalToUnit, bool isDefinition,
-                                     unsigned VK, unsigned VIndex,
-                                     DIType VTableHolder, unsigned Flags,
-                                     bool isOptimized, Function *Fn,
-                                     MDNode *TParam) {
-  assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
+             VMContext, MDScopeRef::get(getNonCompileUnitScope(Context)), Name,
+             LinkageName, File, LineNo, Ty,
+             isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, Flags,
+             isOptimized, Fn, cast_or_null<MDTuple>(TParams),
+             cast_or_null<MDSubprogram>(Decl), nullptr).release();
+}
+
+MDSubprogram *
+DIBuilder::createMethod(MDScope *Context, StringRef Name, StringRef LinkageName,
+                        MDFile *F, unsigned LineNo, MDSubroutineType *Ty,
+                        bool isLocalToUnit, bool isDefinition, unsigned VK,
+                        unsigned VIndex, MDType *VTableHolder, unsigned Flags,
+                        bool isOptimized, Function *Fn, MDNode *TParam) {
+  assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
          "function types should be subroutines");
   assert(getNonCompileUnitScope(Context) &&
          "Methods should have both a Context and a context that isn't "
          "the compile unit.");
   // FIXME: Do we want to use different scope/lines?
-  auto *Node = MDSubprogram::get(
-      VMContext, DIScope(Context).getRef(), Name, LinkageName, F.getFileNode(),
-      LineNo, Ty, isLocalToUnit, isDefinition, LineNo, VTableHolder.getRef(),
-      VK, VIndex, Flags, isOptimized, getConstantOrNull(Fn), TParam, nullptr,
-      nullptr);
+  auto *SP = MDSubprogram::get(
+      VMContext, MDScopeRef::get(cast<MDScope>(Context)), Name, LinkageName, F,
+      LineNo, Ty, isLocalToUnit, isDefinition, LineNo,
+      MDTypeRef::get(VTableHolder), VK, VIndex, Flags, isOptimized, Fn,
+      cast_or_null<MDTuple>(TParam), nullptr, nullptr);
 
   if (isDefinition)
-    AllSubprograms.push_back(Node);
-  DISubprogram S(Node);
-  assert(S.isSubprogram() && "createMethod should return a valid DISubprogram");
-  trackIfUnresolved(S);
-  return S;
-}
-
-DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
-                                       DIFile File, unsigned LineNo) {
-  DINameSpace R = MDNamespace::get(VMContext, getNonCompileUnitScope(Scope),
-                                   File.getFileNode(), Name, LineNo);
-  assert(R.Verify() &&
-         "createNameSpace should return a verifiable DINameSpace");
-  return R;
+    AllSubprograms.push_back(SP);
+  trackIfUnresolved(SP);
+  return SP;
+}
+
+MDNamespace* DIBuilder::createNameSpace(MDScope * Scope, StringRef Name,
+                                       MDFile* File, unsigned LineNo) {
+  return MDNamespace::get(VMContext, getNonCompileUnitScope(Scope), File, Name,
+                          LineNo);
 }
 
-DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
-                                                     DIFile File,
+MDLexicalBlockFile* DIBuilder::createLexicalBlockFile(MDScope * Scope,
+                                                     MDFile* File,
                                                      unsigned Discriminator) {
-  DILexicalBlockFile R = MDLexicalBlockFile::get(
-      VMContext, Scope, File.getFileNode(), Discriminator);
-  assert(
-      R.Verify() &&
-      "createLexicalBlockFile should return a verifiable DILexicalBlockFile");
-  return R;
+  return MDLexicalBlockFile::get(VMContext, Scope, File, Discriminator);
 }
 
-DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
+MDLexicalBlock* DIBuilder::createLexicalBlock(MDScope * Scope, MDFile* File,
                                              unsigned Line, unsigned Col) {
   // Make these distinct, to avoid merging two lexical blocks on the same
   // file/line/column.
-  DILexicalBlock R = MDLexicalBlock::getDistinct(
-      VMContext, getNonCompileUnitScope(Scope), File.getFileNode(), Line, Col);
-  assert(R.Verify() &&
-         "createLexicalBlock should return a verifiable DILexicalBlock");
-  return R;
+  return MDLexicalBlock::getDistinct(VMContext, getNonCompileUnitScope(Scope),
+                                     File, Line, Col);
 }
 
 static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) {
@@ -758,11 +724,19 @@ static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) {
   return MetadataAsValue::get(VMContext, ValueAsMetadata::get(V));
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
-                                      DIExpression Expr,
+static Instruction *withDebugLoc(Instruction *I, const MDLocation *DL) {
+  I->setDebugLoc(const_cast<MDLocation *>(DL));
+  return I;
+}
+
+Instruction *DIBuilder::insertDeclare(Value *Storage, MDLocalVariable* VarInfo,
+                                      MDExpression* Expr, const MDLocation *DL,
                                       Instruction *InsertBefore) {
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.declare");
+  assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.declare");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
@@ -771,14 +745,17 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   Value *Args[] = {getDbgIntrinsicValueImpl(VMContext, Storage),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(DeclareFn, Args, "", InsertBefore);
+  return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertBefore), DL);
 }
 
-Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
-                                      DIExpression Expr,
+Instruction *DIBuilder::insertDeclare(Value *Storage, MDLocalVariable* VarInfo,
+                                      MDExpression* Expr, const MDLocation *DL,
                                       BasicBlock *InsertAtEnd) {
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.declare");
+  assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.declare");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
@@ -791,18 +768,21 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   // If this block already has a terminator then insert this intrinsic
   // before the terminator.
   if (TerminatorInst *T = InsertAtEnd->getTerminator())
-    return CallInst::Create(DeclareFn, Args, "", T);
-  else
-    return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
+    return withDebugLoc(CallInst::Create(DeclareFn, Args, "", T), DL);
+  return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertAtEnd), DL);
 }
 
 Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable VarInfo,
-                                                DIExpression Expr,
+                                                MDLocalVariable* VarInfo,
+                                                MDExpression* Expr,
+                                                const MDLocation *DL,
                                                 Instruction *InsertBefore) {
   assert(V && "no value passed to dbg.value");
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.value");
+  assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.value");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
@@ -812,16 +792,20 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                    ConstantInt::get(Type::getInt64Ty(VMContext), Offset),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(ValueFn, Args, "", InsertBefore);
+  return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertBefore), DL);
 }
 
 Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
-                                                DIVariable VarInfo,
-                                                DIExpression Expr,
+                                                MDLocalVariable* VarInfo,
+                                                MDExpression* Expr,
+                                                const MDLocation *DL,
                                                 BasicBlock *InsertAtEnd) {
   assert(V && "no value passed to dbg.value");
-  assert(VarInfo.isVariable() &&
-         "empty or invalid DIVariable passed to dbg.value");
+  assert(VarInfo && "empty or invalid MDLocalVariable* passed to dbg.value");
+  assert(DL && "Expected debug loc");
+  assert(DL->getScope()->getSubprogram() ==
+             VarInfo->getScope()->getSubprogram() &&
+         "Expected matching subprograms");
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
@@ -831,11 +815,16 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
                    ConstantInt::get(Type::getInt64Ty(VMContext), Offset),
                    MetadataAsValue::get(VMContext, VarInfo),
                    MetadataAsValue::get(VMContext, Expr)};
-  return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
+
+  return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertAtEnd), DL);
 }
 
-void DIBuilder::replaceVTableHolder(DICompositeType &T, DICompositeType VTableHolder) {
-  T.setContainingType(VTableHolder);
+void DIBuilder::replaceVTableHolder(MDCompositeType* &T, MDCompositeType* VTableHolder) {
+  {
+    TypedTrackingMDRef<MDCompositeType> N(T);
+    N->replaceVTableHolder(MDTypeRef::get(VTableHolder));
+    T = N.get();
+  }
 
   // If this didn't create a self-reference, just return.
   if (T != VTableHolder)
@@ -849,9 +838,16 @@ void DIBuilder::replaceVTableHolder(DICompositeType &T, DICompositeType VTableHo
         trackIfUnresolved(N);
 }
 
-void DIBuilder::replaceArrays(DICompositeType &T, DIArray Elements,
+void DIBuilder::replaceArrays(MDCompositeType* &T, DIArray Elements,
                               DIArray TParams) {
-  T.setArrays(Elements, TParams);
+  {
+    TypedTrackingMDRef<MDCompositeType> N(T);
+    if (Elements)
+      N->replaceElements(Elements);
+    if (TParams)
+      N->replaceTemplateParams(MDTemplateParameterArray(TParams));
+    T = N.get();
+  }
 
   // If T isn't resolved, there's no problem.
   if (!T->isResolved())
@@ -861,7 +857,7 @@ void DIBuilder::replaceArrays(DICompositeType &T, DIArray Elements,
   // arrays explicitly if they're unresolved, or else the cycles will be
   // orphaned.
   if (Elements)
-    trackIfUnresolved(Elements);
+    trackIfUnresolved(Elements.get());
   if (TParams)
-    trackIfUnresolved(TParams);
+    trackIfUnresolved(TParams.get());
 }
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 9a6b953..719c28b 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -17,7 +17,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
@@ -25,6 +24,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
@@ -33,612 +33,62 @@
 using namespace llvm;
 using namespace llvm::dwarf;
 
-//===----------------------------------------------------------------------===//
-// DIDescriptor
-//===----------------------------------------------------------------------===//
-
-unsigned DIDescriptor::getFlag(StringRef Flag) {
-  return StringSwitch<unsigned>(Flag)
-#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
-#include "llvm/IR/DebugInfoFlags.def"
-      .Default(0);
-}
-
-const char *DIDescriptor::getFlagString(unsigned Flag) {
-  switch (Flag) {
-  default:
-    return "";
-#define HANDLE_DI_FLAG(ID, NAME)                                               \
-  case Flag##NAME:                                                             \
-    return "DIFlag" #NAME;
-#include "llvm/IR/DebugInfoFlags.def"
-  }
-}
-
-unsigned DIDescriptor::splitFlags(unsigned Flags,
-                                  SmallVectorImpl<unsigned> &SplitFlags) {
-  // Accessibility flags need to be specially handled, since they're packed
-  // together.
-  if (unsigned A = Flags & FlagAccessibility) {
-    if (A == FlagPrivate)
-      SplitFlags.push_back(FlagPrivate);
-    else if (A == FlagProtected)
-      SplitFlags.push_back(FlagProtected);
-    else
-      SplitFlags.push_back(FlagPublic);
-    Flags &= ~A;
-  }
-
-#define HANDLE_DI_FLAG(ID, NAME)                                               \
-  if (unsigned Bit = Flags & ID) {                                             \
-    SplitFlags.push_back(Bit);                                                 \
-    Flags &= ~Bit;                                                             \
-  }
-#include "llvm/IR/DebugInfoFlags.def"
-
-  return Flags;
-}
-
-bool DIDescriptor::Verify() const {
-  return DbgNode &&
-         (DIDerivedType(DbgNode).Verify() ||
-          DICompositeType(DbgNode).Verify() || DIBasicType(DbgNode).Verify() ||
-          DIVariable(DbgNode).Verify() || DISubprogram(DbgNode).Verify() ||
-          DIGlobalVariable(DbgNode).Verify() || DIFile(DbgNode).Verify() ||
-          DICompileUnit(DbgNode).Verify() || DINameSpace(DbgNode).Verify() ||
-          DILexicalBlock(DbgNode).Verify() ||
-          DILexicalBlockFile(DbgNode).Verify() ||
-          DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
-          DIObjCProperty(DbgNode).Verify() ||
-          DITemplateTypeParameter(DbgNode).Verify() ||
-          DITemplateValueParameter(DbgNode).Verify() ||
-          DIImportedEntity(DbgNode).Verify());
-}
-
-static Metadata *getField(const MDNode *DbgNode, unsigned Elt) {
-  if (!DbgNode || Elt >= DbgNode->getNumOperands())
-    return nullptr;
-  return DbgNode->getOperand(Elt);
-}
-
-static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) {
-  return dyn_cast_or_null<MDNode>(getField(DbgNode, Elt));
-}
-
-static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) {
-  if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt)))
-    return MDS->getString();
-  return StringRef();
-}
-
-StringRef DIDescriptor::getStringField(unsigned Elt) const {
-  return ::getStringField(DbgNode, Elt);
-}
-
-uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
-  if (auto *C = getConstantField(Elt))
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return CI->getZExtValue();
-
-  return 0;
-}
-
-int64_t DIDescriptor::getInt64Field(unsigned Elt) const {
-  if (auto *C = getConstantField(Elt))
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
-      return CI->getZExtValue();
-
-  return 0;
-}
-
-DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
-  MDNode *Field = getNodeField(DbgNode, Elt);
-  return DIDescriptor(Field);
-}
-
-GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
-  return dyn_cast_or_null<GlobalVariable>(getConstantField(Elt));
-}
-
-Constant *DIDescriptor::getConstantField(unsigned Elt) const {
-  if (!DbgNode)
-    return nullptr;
-
-  if (Elt < DbgNode->getNumOperands())
-    if (auto *C =
-            dyn_cast_or_null<ConstantAsMetadata>(DbgNode->getOperand(Elt)))
-      return C->getValue();
-  return nullptr;
-}
-
-Function *DIDescriptor::getFunctionField(unsigned Elt) const {
-  return dyn_cast_or_null<Function>(getConstantField(Elt));
-}
-
-/// \brief Return the size reported by the variable's type.
-unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) {
-  DIType Ty = getType().resolve(Map);
-  // Follow derived types until we reach a type that
-  // reports back a size.
-  while (Ty.isDerivedType() && !Ty.getSizeInBits()) {
-    DIDerivedType DT(&*Ty);
-    Ty = DT.getTypeDerivedFrom().resolve(Map);
-  }
-  assert(Ty.getSizeInBits() && "type with size 0");
-  return Ty.getSizeInBits();
-}
-
-bool DIExpression::isBitPiece() const {
-  unsigned N = getNumElements();
-  return N >=3 && getElement(N-3) == dwarf::DW_OP_bit_piece;
-}
-
-uint64_t DIExpression::getBitPieceOffset() const {
-  assert(isBitPiece() && "not a piece");
-  return getElement(getNumElements()-2);
-}
-
-uint64_t DIExpression::getBitPieceSize() const {
-  assert(isBitPiece() && "not a piece");
-  return getElement(getNumElements()-1);
-}
-
-DIExpression::iterator DIExpression::Operand::getNext() const {
-  iterator it(I);
-  return ++it;
-}
-
-//===----------------------------------------------------------------------===//
-// Simple Descriptor Constructors and other Methods
-//===----------------------------------------------------------------------===//
-
-void DIDescriptor::replaceAllUsesWith(LLVMContext &, DIDescriptor D) {
-  assert(DbgNode && "Trying to replace an unverified type!");
-  assert(DbgNode->isTemporary() && "Expected temporary node");
-  TempMDNode Temp(get());
-
-  // Since we use a TrackingVH for the node, its easy for clients to manufacture
-  // legitimate situations where they want to replaceAllUsesWith() on something
-  // which, due to uniquing, has merged with the source. We shield clients from
-  // this detail by allowing a value to be replaced with replaceAllUsesWith()
-  // itself.
-  if (Temp.get() == D.get()) {
-    DbgNode = MDNode::replaceWithUniqued(std::move(Temp));
-    return;
-  }
-
-  Temp->replaceAllUsesWith(D.get());
-  DbgNode = D.get();
-}
-
-void DIDescriptor::replaceAllUsesWith(MDNode *D) {
-  assert(DbgNode && "Trying to replace an unverified type!");
-  assert(DbgNode != D && "This replacement should always happen");
-  assert(DbgNode->isTemporary() && "Expected temporary node");
-  TempMDNode Node(get());
-  Node->replaceAllUsesWith(D);
-}
-
-bool DICompileUnit::Verify() const {
-  if (!isCompileUnit())
-    return false;
-
-  // Don't bother verifying the compilation directory or producer string
-  // as those could be empty.
-  return !getFilename().empty();
-}
-
-bool DIObjCProperty::Verify() const { return isObjCProperty(); }
-
-/// \brief Check if a value can be a reference to a type.
-static bool isTypeRef(const Metadata *MD) {
-  if (!MD)
-    return true;
-  if (auto *S = dyn_cast<MDString>(MD))
-    return !S->getString().empty();
-  return isa<MDType>(MD);
-}
-
-/// \brief Check if a value can be a ScopeRef.
-static bool isScopeRef(const Metadata *MD) {
-  if (!MD)
-    return true;
-  if (auto *S = dyn_cast<MDString>(MD))
-    return !S->getString().empty();
-  return isa<MDScope>(MD);
-}
-
-#ifndef NDEBUG
-/// \brief Check if a value can be a DescriptorRef.
-static bool isDescriptorRef(const Metadata *MD) {
-  if (!MD)
-    return true;
-  if (auto *S = dyn_cast<MDString>(MD))
-    return !S->getString().empty();
-  return isa<MDNode>(MD);
-}
-#endif
-
-bool DIType::Verify() const {
-  auto *N = dyn_cast_or_null<MDType>(DbgNode);
-  if (!N)
-    return false;
-  if (!isScopeRef(N->getScope()))
-    return false;
-
-  // DIType is abstract, it should be a BasicType, a DerivedType or
-  // a CompositeType.
-  if (isBasicType())
-    return DIBasicType(DbgNode).Verify();
-
-  // FIXME: Sink this into the various subclass verifies.
-  if (getFilename().empty()) {
-    // Check whether the filename is allowed to be empty.
-    uint16_t Tag = getTag();
-    if (Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
-        Tag != dwarf::DW_TAG_pointer_type &&
-        Tag != dwarf::DW_TAG_ptr_to_member_type &&
-        Tag != dwarf::DW_TAG_reference_type &&
-        Tag != dwarf::DW_TAG_rvalue_reference_type &&
-        Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type &&
-        Tag != dwarf::DW_TAG_enumeration_type &&
-        Tag != dwarf::DW_TAG_subroutine_type &&
-        Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend &&
-        Tag != dwarf::DW_TAG_structure_type && Tag != dwarf::DW_TAG_member &&
-        Tag != dwarf::DW_TAG_typedef)
-      return false;
-  }
-
-  if (isCompositeType())
-    return DICompositeType(DbgNode).Verify();
-  if (isDerivedType())
-    return DIDerivedType(DbgNode).Verify();
-  return false;
-}
-
-bool DIBasicType::Verify() const {
-  return dyn_cast_or_null<MDBasicType>(DbgNode);
-}
-
-bool DIDerivedType::Verify() const {
-  auto *N = dyn_cast_or_null<MDDerivedTypeBase>(DbgNode);
-  if (!N)
-    return false;
-  if (getTag() == dwarf::DW_TAG_ptr_to_member_type) {
-    auto *D = dyn_cast<MDDerivedType>(N);
-    if (!D)
-      return false;
-    if (!isTypeRef(D->getExtraData()))
-      return false;
-  }
-  return isTypeRef(N->getBaseType());
-}
-
-bool DICompositeType::Verify() const {
-  auto *N = dyn_cast_or_null<MDCompositeTypeBase>(DbgNode);
-  return N && isTypeRef(N->getBaseType()) && isTypeRef(N->getVTableHolder()) &&
-         !(isLValueReference() && isRValueReference());
-}
-
-bool DISubprogram::Verify() const {
-  auto *N = dyn_cast_or_null<MDSubprogram>(DbgNode);
-  if (!N)
-    return false;
-
-  if (!isScopeRef(N->getScope()))
-    return false;
-
-  if (auto *Op = N->getType())
-    if (!isa<MDNode>(Op))
-      return false;
-
-  if (!isTypeRef(getContainingType()))
-    return false;
-
-  if (isLValueReference() && isRValueReference())
-    return false;
-
-  // If a DISubprogram has an llvm::Function*, then scope chains from all
-  // instructions within the function should lead to this DISubprogram.
-  if (auto *F = getFunction()) {
-    for (auto &BB : *F) {
-      for (auto &I : BB) {
-        DebugLoc DL = I.getDebugLoc();
-        if (DL.isUnknown())
-          continue;
-
-        MDNode *Scope = nullptr;
-        MDNode *IA = nullptr;
-        // walk the inlined-at scopes
-        while ((IA = DL.getInlinedAt()))
-          DL = DebugLoc::getFromDILocation(IA);
-        DL.getScopeAndInlinedAt(Scope, IA);
-        if (!Scope)
-          return false;
-        assert(!IA);
-        while (!DIDescriptor(Scope).isSubprogram()) {
-          DILexicalBlockFile D(Scope);
-          Scope = D.isLexicalBlockFile()
-                      ? D.getScope()
-                      : DebugLoc::getFromDILexicalBlock(Scope).getScope();
-          if (!Scope)
-            return false;
-        }
-        if (!DISubprogram(Scope).describes(F))
-          return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-bool DIGlobalVariable::Verify() const {
-  auto *N = dyn_cast_or_null<MDGlobalVariable>(DbgNode);
-
-  if (!N)
-    return false;
-
-  if (N->getDisplayName().empty())
-    return false;
-
-  if (auto *Op = N->getScope())
-    if (!isa<MDNode>(Op))
-      return false;
-
-  if (auto *Op = N->getStaticDataMemberDeclaration())
-    if (!isa<MDNode>(Op))
-      return false;
-
-  return isTypeRef(N->getType());
-}
-
-bool DIVariable::Verify() const {
-  auto *N = dyn_cast_or_null<MDLocalVariable>(DbgNode);
-
-  if (!N)
-    return false;
-
-  if (auto *Op = N->getScope())
-    if (!isa<MDNode>(Op))
-      return false;
-
-  return isTypeRef(N->getType());
-}
-
-bool DILocation::Verify() const {
-  return dyn_cast_or_null<MDLocation>(DbgNode);
-}
-bool DINameSpace::Verify() const {
-  return dyn_cast_or_null<MDNamespace>(DbgNode);
-}
-bool DIFile::Verify() const { return dyn_cast_or_null<MDFile>(DbgNode); }
-bool DIEnumerator::Verify() const {
-  return dyn_cast_or_null<MDEnumerator>(DbgNode);
-}
-bool DISubrange::Verify() const {
-  return dyn_cast_or_null<MDSubrange>(DbgNode);
-}
-bool DILexicalBlock::Verify() const {
-  return dyn_cast_or_null<MDLexicalBlock>(DbgNode);
-}
-bool DILexicalBlockFile::Verify() const {
-  return dyn_cast_or_null<MDLexicalBlockFile>(DbgNode);
-}
-bool DITemplateTypeParameter::Verify() const {
-  return dyn_cast_or_null<MDTemplateTypeParameter>(DbgNode);
-}
-bool DITemplateValueParameter::Verify() const {
-  return dyn_cast_or_null<MDTemplateValueParameter>(DbgNode);
-}
-bool DIImportedEntity::Verify() const {
-  return dyn_cast_or_null<MDImportedEntity>(DbgNode);
-}
-
-void DICompositeType::setArraysHelper(MDNode *Elements, MDNode *TParams) {
-  TypedTrackingMDRef<MDCompositeTypeBase> N(get());
-  if (Elements)
-    N->replaceElements(cast<MDTuple>(Elements));
-  if (TParams)
-    N->replaceTemplateParams(cast<MDTuple>(TParams));
-  DbgNode = N;
-}
-
-DIScopeRef DIScope::getRef() const {
-  if (!isCompositeType())
-    return DIScopeRef(*this);
-  DICompositeType DTy(DbgNode);
-  if (!DTy.getIdentifier())
-    return DIScopeRef(*this);
-  return DIScopeRef(DTy.getIdentifier());
-}
-
-void DICompositeType::setContainingType(DICompositeType ContainingType) {
-  TypedTrackingMDRef<MDCompositeTypeBase> N(get());
-  N->replaceVTableHolder(ContainingType.getRef());
-  DbgNode = N;
-}
-
-bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
-  assert(CurFn && "Invalid function");
-  if (!getContext().isSubprogram())
-    return false;
-  // This variable is not inlined function argument if its scope
-  // does not describe current function.
-  return !DISubprogram(getContext()).describes(CurFn);
-}
-
-Function *DISubprogram::getFunction() const {
-  if (auto *N = get())
-    if (auto *C = dyn_cast_or_null<ConstantAsMetadata>(N->getFunction()))
-      return dyn_cast<Function>(C->getValue());
-  return nullptr;
-}
-
-bool DISubprogram::describes(const Function *F) {
-  assert(F && "Invalid function");
-  if (F == getFunction())
-    return true;
-  StringRef Name = getLinkageName();
-  if (Name.empty())
-    Name = getName();
-  if (F->getName() == Name)
-    return true;
-  return false;
-}
-
-GlobalVariable *DIGlobalVariable::getGlobal() const {
-  return dyn_cast_or_null<GlobalVariable>(getConstant());
-}
-
-DIScopeRef DIScope::getContext() const {
-
-  if (isType())
-    return DIType(DbgNode).getContext();
-
-  if (isSubprogram())
-    return DIScopeRef(DISubprogram(DbgNode).getContext());
-
-  if (isLexicalBlock())
-    return DIScopeRef(DILexicalBlock(DbgNode).getContext());
-
-  if (isLexicalBlockFile())
-    return DIScopeRef(DILexicalBlockFile(DbgNode).getContext());
-
-  if (isNameSpace())
-    return DIScopeRef(DINameSpace(DbgNode).getContext());
-
-  assert((isFile() || isCompileUnit()) && "Unhandled type of scope.");
-  return DIScopeRef(nullptr);
-}
-
-StringRef DIScope::getName() const {
-  if (isType())
-    return DIType(DbgNode).getName();
-  if (isSubprogram())
-    return DISubprogram(DbgNode).getName();
-  if (isNameSpace())
-    return DINameSpace(DbgNode).getName();
-  assert((isLexicalBlock() || isLexicalBlockFile() || isFile() ||
-          isCompileUnit()) &&
-         "Unhandled type of scope.");
-  return StringRef();
-}
-
-StringRef DIScope::getFilename() const {
-  if (auto *N = get())
-    return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 0);
-  return "";
-}
-
-StringRef DIScope::getDirectory() const {
-  if (auto *N = get())
-    return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 1);
-  return "";
-}
-
-void DICompileUnit::replaceSubprograms(DIArray Subprograms) {
-  assert(Verify() && "Expected compile unit");
-  get()->replaceSubprograms(cast_or_null<MDTuple>(Subprograms.get()));
-}
-
-void DICompileUnit::replaceGlobalVariables(DIArray GlobalVariables) {
-  assert(Verify() && "Expected compile unit");
-  get()->replaceGlobalVariables(cast_or_null<MDTuple>(GlobalVariables.get()));
-}
-
-DILocation DILocation::copyWithNewScope(LLVMContext &Ctx,
-                                        DILexicalBlockFile NewScope) {
-  assert(Verify());
-  assert(NewScope && "Expected valid scope");
-
-  const auto *Old = cast<MDLocation>(DbgNode);
-  return DILocation(MDLocation::get(Ctx, Old->getLine(), Old->getColumn(),
-                                    NewScope, Old->getInlinedAt()));
-}
-
-unsigned DILocation::computeNewDiscriminator(LLVMContext &Ctx) {
-  std::pair<const char *, unsigned> Key(getFilename().data(), getLineNumber());
-  return ++Ctx.pImpl->DiscriminatorTable[Key];
-}
-
-DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
-                                       LLVMContext &VMContext) {
-  assert(DIVariable(DV).Verify() && "Expected a DIVariable");
-  return cast<MDLocalVariable>(DV)
-      ->withInline(cast_or_null<MDLocation>(InlinedScope));
-}
-
-DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
-  assert(DIVariable(DV).Verify() && "Expected a DIVariable");
-  return cast<MDLocalVariable>(DV)->withoutInline();
-}
-
 DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
-  DIDescriptor D(Scope);
-  if (D.isSubprogram())
-    return DISubprogram(Scope);
-
-  if (D.isLexicalBlockFile())
-    return getDISubprogram(DILexicalBlockFile(Scope).getContext());
-
-  if (D.isLexicalBlock())
-    return getDISubprogram(DILexicalBlock(Scope).getContext());
-
-  return DISubprogram();
+  if (auto *LocalScope = dyn_cast_or_null<MDLocalScope>(Scope))
+    return LocalScope->getSubprogram();
+  return nullptr;
 }
 
 DISubprogram llvm::getDISubprogram(const Function *F) {
   // We look for the first instr that has a debug annotation leading back to F.
   for (auto &BB : *F) {
     auto Inst = std::find_if(BB.begin(), BB.end(), [](const Instruction &Inst) {
-      return !Inst.getDebugLoc().isUnknown();
+      return Inst.getDebugLoc();
     });
     if (Inst == BB.end())
       continue;
     DebugLoc DLoc = Inst->getDebugLoc();
-    const MDNode *Scope = DLoc.getScopeNode();
+    const MDNode *Scope = DLoc.getInlinedAtScope();
     DISubprogram Subprogram = getDISubprogram(Scope);
-    return Subprogram.describes(F) ? Subprogram : DISubprogram();
+    return Subprogram->describes(F) ? Subprogram : DISubprogram();
   }
 
   return DISubprogram();
 }
 
 DICompositeType llvm::getDICompositeType(DIType T) {
-  if (T.isCompositeType())
-    return DICompositeType(T);
+  if (auto *C = dyn_cast_or_null<MDCompositeTypeBase>(T))
+    return C;
 
-  if (T.isDerivedType()) {
+  if (auto *D = dyn_cast_or_null<MDDerivedTypeBase>(T)) {
     // This function is currently used by dragonegg and dragonegg does
     // not generate identifier for types, so using an empty map to resolve
     // DerivedFrom should be fine.
     DITypeIdentifierMap EmptyMap;
-    return getDICompositeType(
-        DIDerivedType(T).getTypeDerivedFrom().resolve(EmptyMap));
+    return getDICompositeType(D->getBaseType().resolve(EmptyMap));
   }
 
-  return DICompositeType();
+  return nullptr;
 }
 
 DITypeIdentifierMap
 llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) {
   DITypeIdentifierMap Map;
   for (unsigned CUi = 0, CUe = CU_Nodes->getNumOperands(); CUi != CUe; ++CUi) {
-    DICompileUnit CU(CU_Nodes->getOperand(CUi));
-    DIArray Retain = CU.getRetainedTypes();
-    for (unsigned Ti = 0, Te = Retain.getNumElements(); Ti != Te; ++Ti) {
-      if (!Retain.getElement(Ti).isCompositeType())
+    auto *CU = cast<MDCompileUnit>(CU_Nodes->getOperand(CUi));
+    DIArray Retain = CU->getRetainedTypes();
+    for (unsigned Ti = 0, Te = Retain.size(); Ti != Te; ++Ti) {
+      if (!isa<MDCompositeType>(Retain[Ti]))
         continue;
-      DICompositeType Ty(Retain.getElement(Ti));
-      if (MDString *TypeId = Ty.getIdentifier()) {
+      auto *Ty = cast<MDCompositeType>(Retain[Ti]);
+      if (MDString *TypeId = Ty->getRawIdentifier()) {
         // Definition has priority over declaration.
         // Try to insert (TypeId, Ty) to Map.
         std::pair<DITypeIdentifierMap::iterator, bool> P =
             Map.insert(std::make_pair(TypeId, Ty));
         // If TypeId already exists in Map and this is a definition, replace
         // whatever we had (declaration or definition) with the definition.
-        if (!P.second && !Ty.isForwardDecl())
+        if (!P.second && !Ty->isForwardDecl())
           P.first->second = Ty;
       }
     }
@@ -673,37 +123,28 @@ void DebugInfoFinder::processModule(const Module &M) {
   InitializeTypeMap(M);
   if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit CU(CU_Nodes->getOperand(i));
+      DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));
       addCompileUnit(CU);
-      DIArray GVs = CU.getGlobalVariables();
-      for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
-        DIGlobalVariable DIG(GVs.getElement(i));
+      for (DIGlobalVariable DIG : CU->getGlobalVariables()) {
         if (addGlobalVariable(DIG)) {
-          processScope(DIG.getContext());
-          processType(DIG.getType().resolve(TypeIdentifierMap));
+          processScope(DIG->getScope());
+          processType(DIG->getType().resolve(TypeIdentifierMap));
         }
       }
-      DIArray SPs = CU.getSubprograms();
-      for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
-        processSubprogram(DISubprogram(SPs.getElement(i)));
-      DIArray EnumTypes = CU.getEnumTypes();
-      for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
-        processType(DIType(EnumTypes.getElement(i)));
-      DIArray RetainedTypes = CU.getRetainedTypes();
-      for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
-        processType(DIType(RetainedTypes.getElement(i)));
-      DIArray Imports = CU.getImportedEntities();
-      for (unsigned i = 0, e = Imports.getNumElements(); i != e; ++i) {
-        DIImportedEntity Import = DIImportedEntity(Imports.getElement(i));
-        if (!Import)
-          continue;
-        DIDescriptor Entity = Import.getEntity().resolve(TypeIdentifierMap);
-        if (Entity.isType())
-          processType(DIType(Entity));
-        else if (Entity.isSubprogram())
-          processSubprogram(DISubprogram(Entity));
-        else if (Entity.isNameSpace())
-          processScope(DINameSpace(Entity).getContext());
+      for (auto *SP : CU->getSubprograms())
+        processSubprogram(SP);
+      for (auto *ET : CU->getEnumTypes())
+        processType(ET);
+      for (auto *RT : CU->getRetainedTypes())
+        processType(RT);
+      for (DIImportedEntity Import : CU->getImportedEntities()) {
+        auto *Entity = Import->getEntity().resolve(TypeIdentifierMap);
+        if (auto *T = dyn_cast<MDType>(Entity))
+          processType(T);
+        else if (auto *SP = dyn_cast<MDSubprogram>(Entity))
+          processSubprogram(SP);
+        else if (auto *NS = dyn_cast<MDNamespace>(Entity))
+          processScope(NS->getScope());
       }
     }
   }
@@ -713,79 +154,66 @@ void DebugInfoFinder::processLocation(const Module &M, DILocation Loc) {
   if (!Loc)
     return;
   InitializeTypeMap(M);
-  processScope(Loc.getScope());
-  processLocation(M, Loc.getOrigLocation());
+  processScope(Loc->getScope());
+  processLocation(M, Loc->getInlinedAt());
 }
 
 void DebugInfoFinder::processType(DIType DT) {
   if (!addType(DT))
     return;
-  processScope(DT.getContext().resolve(TypeIdentifierMap));
-  if (DT.isCompositeType()) {
-    DICompositeType DCT(DT);
-    processType(DCT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
-    if (DT.isSubroutineType()) {
-      DITypeArray DTA = DISubroutineType(DT).getTypeArray();
-      for (unsigned i = 0, e = DTA.getNumElements(); i != e; ++i)
-        processType(DTA.getElement(i).resolve(TypeIdentifierMap));
+  processScope(DT->getScope().resolve(TypeIdentifierMap));
+  if (auto *DCT = dyn_cast<MDCompositeTypeBase>(DT)) {
+    processType(DCT->getBaseType().resolve(TypeIdentifierMap));
+    if (auto *ST = dyn_cast<MDSubroutineType>(DCT)) {
+      for (MDTypeRef Ref : ST->getTypeArray())
+        processType(Ref.resolve(TypeIdentifierMap));
       return;
     }
-    DIArray DA = DCT.getElements();
-    for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
-      DIDescriptor D = DA.getElement(i);
-      if (D.isType())
-        processType(DIType(D));
-      else if (D.isSubprogram())
-        processSubprogram(DISubprogram(D));
+    for (Metadata *D : DCT->getElements()) {
+      if (auto *T = dyn_cast<MDType>(D))
+        processType(T);
+      else if (DISubprogram SP = dyn_cast<MDSubprogram>(D))
+        processSubprogram(SP);
     }
-  } else if (DT.isDerivedType()) {
-    DIDerivedType DDT(DT);
-    processType(DDT.getTypeDerivedFrom().resolve(TypeIdentifierMap));
+  } else if (auto *DDT = dyn_cast<MDDerivedTypeBase>(DT)) {
+    processType(DDT->getBaseType().resolve(TypeIdentifierMap));
   }
 }
 
 void DebugInfoFinder::processScope(DIScope Scope) {
-  if (Scope.isType()) {
-    DIType Ty(Scope);
+  if (!Scope)
+    return;
+  if (DIType Ty = dyn_cast<MDType>(Scope)) {
     processType(Ty);
     return;
   }
-  if (Scope.isCompileUnit()) {
-    addCompileUnit(DICompileUnit(Scope));
+  if (DICompileUnit CU = dyn_cast<MDCompileUnit>(Scope)) {
+    addCompileUnit(CU);
     return;
   }
-  if (Scope.isSubprogram()) {
-    processSubprogram(DISubprogram(Scope));
+  if (DISubprogram SP = dyn_cast<MDSubprogram>(Scope)) {
+    processSubprogram(SP);
     return;
   }
   if (!addScope(Scope))
     return;
-  if (Scope.isLexicalBlock()) {
-    DILexicalBlock LB(Scope);
-    processScope(LB.getContext());
-  } else if (Scope.isLexicalBlockFile()) {
-    DILexicalBlockFile LBF = DILexicalBlockFile(Scope);
-    processScope(LBF.getScope());
-  } else if (Scope.isNameSpace()) {
-    DINameSpace NS(Scope);
-    processScope(NS.getContext());
+  if (auto *LB = dyn_cast<MDLexicalBlockBase>(Scope)) {
+    processScope(LB->getScope());
+  } else if (auto *NS = dyn_cast<MDNamespace>(Scope)) {
+    processScope(NS->getScope());
   }
 }
 
 void DebugInfoFinder::processSubprogram(DISubprogram SP) {
   if (!addSubprogram(SP))
     return;
-  processScope(SP.getContext().resolve(TypeIdentifierMap));
-  processType(SP.getType());
-  DIArray TParams = SP.getTemplateParams();
-  for (unsigned I = 0, E = TParams.getNumElements(); I != E; ++I) {
-    DIDescriptor Element = TParams.getElement(I);
-    if (Element.isTemplateTypeParameter()) {
-      DITemplateTypeParameter TType(Element);
-      processType(TType.getType().resolve(TypeIdentifierMap));
-    } else if (Element.isTemplateValueParameter()) {
-      DITemplateValueParameter TVal(Element);
-      processType(TVal.getType().resolve(TypeIdentifierMap));
+  processScope(SP->getScope().resolve(TypeIdentifierMap));
+  processType(SP->getType());
+  for (auto *Element : SP->getTemplateParams()) {
+    if (auto *TType = dyn_cast<MDTemplateTypeParameter>(Element)) {
+      processType(TType->getType().resolve(TypeIdentifierMap));
+    } else if (auto *TVal = dyn_cast<MDTemplateValueParameter>(Element)) {
+      processType(TVal->getType().resolve(TypeIdentifierMap));
     }
   }
 }
@@ -797,14 +225,14 @@ void DebugInfoFinder::processDeclare(const Module &M,
     return;
   InitializeTypeMap(M);
 
-  DIDescriptor DV(N);
-  if (!DV.isVariable())
+  DIVariable DV = dyn_cast<MDLocalVariable>(N);
+  if (!DV)
     return;
 
   if (!NodesSeen.insert(DV).second)
     return;
-  processScope(DIVariable(N).getContext());
-  processType(DIVariable(N).getType().resolve(TypeIdentifierMap));
+  processScope(DV->getScope());
+  processType(DV->getType().resolve(TypeIdentifierMap));
 }
 
 void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
@@ -813,14 +241,14 @@ void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) {
     return;
   InitializeTypeMap(M);
 
-  DIDescriptor DV(N);
-  if (!DV.isVariable())
+  DIVariable DV = dyn_cast<MDLocalVariable>(N);
+  if (!DV)
     return;
 
   if (!NodesSeen.insert(DV).second)
     return;
-  processScope(DIVariable(N).getContext());
-  processType(DIVariable(N).getType().resolve(TypeIdentifierMap));
+  processScope(DV->getScope());
+  processType(DV->getType().resolve(TypeIdentifierMap));
 }
 
 bool DebugInfoFinder::addType(DIType DT) {
@@ -879,76 +307,17 @@ bool DebugInfoFinder::addScope(DIScope Scope) {
   return true;
 }
 
-//===----------------------------------------------------------------------===//
-// DIDescriptor: dump routines for all descriptors.
-//===----------------------------------------------------------------------===//
-
-void DIDescriptor::dump() const {
-  print(dbgs());
-  dbgs() << '\n';
-}
-
-void DIDescriptor::print(raw_ostream &OS) const {
-  if (!get())
-    return;
-  get()->print(OS);
-}
-
-static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
-                          const LLVMContext &Ctx) {
-  if (!DL.isUnknown()) { // Print source line info.
-    DIScope Scope(DL.getScope(Ctx));
-    assert(Scope.isScope() && "Scope of a DebugLoc should be a DIScope.");
-    // Omit the directory, because it's likely to be long and uninteresting.
-    CommentOS << Scope.getFilename();
-    CommentOS << ':' << DL.getLine();
-    if (DL.getCol() != 0)
-      CommentOS << ':' << DL.getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
-    if (!InlinedAtDL.isUnknown()) {
-      CommentOS << " @[ ";
-      printDebugLoc(InlinedAtDL, CommentOS, Ctx);
-      CommentOS << " ]";
-    }
-  }
-}
-
-void DIVariable::printExtendedName(raw_ostream &OS) const {
-  const LLVMContext &Ctx = DbgNode->getContext();
-  StringRef Res = getName();
-  if (!Res.empty())
-    OS << Res << "," << getLineNumber();
-  if (MDNode *InlinedAt = getInlinedAt()) {
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
-    if (!InlinedAtDL.isUnknown()) {
-      OS << " @[";
-      printDebugLoc(InlinedAtDL, OS, Ctx);
-      OS << "]";
+bool llvm::stripDebugInfo(Function &F) {
+  bool Changed = false;
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (I.getDebugLoc()) {
+        Changed = true;
+        I.setDebugLoc(DebugLoc());
+      }
     }
   }
-}
-
-template <> DIRef<DIDescriptor>::DIRef(const Metadata *V) : Val(V) {
-  assert(isDescriptorRef(V) &&
-         "DIDescriptorRef should be a MDString or MDNode");
-}
-template <> DIRef<DIScope>::DIRef(const Metadata *V) : Val(V) {
-  assert(isScopeRef(V) && "DIScopeRef should be a MDString or MDNode");
-}
-template <> DIRef<DIType>::DIRef(const Metadata *V) : Val(V) {
-  assert(isTypeRef(V) && "DITypeRef should be a MDString or MDNode");
-}
-
-template <>
-DIDescriptorRef DIDescriptor::getFieldAs<DIDescriptorRef>(unsigned Elt) const {
-  return DIDescriptorRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
-}
-template <>
-DIScopeRef DIDescriptor::getFieldAs<DIScopeRef>(unsigned Elt) const {
-  return DIScopeRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
-}
-template <> DITypeRef DIDescriptor::getFieldAs<DITypeRef>(unsigned Elt) const {
-  return DITypeRef(cast_or_null<Metadata>(getField(DbgNode, Elt)));
+  return Changed;
 }
 
 bool llvm::StripDebugInfo(Module &M) {
@@ -984,16 +353,11 @@ bool llvm::StripDebugInfo(Module &M) {
     }
   }
 
-  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
-    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
-         ++FI)
-      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
-           ++BI) {
-        if (!BI->getDebugLoc().isUnknown()) {
-          Changed = true;
-          BI->setDebugLoc(DebugLoc());
-        }
-      }
+  for (Function &F : M)
+    Changed |= stripDebugInfo(F);
+
+  if (GVMaterializer *Materializer = M.getMaterializer())
+    Materializer->setStripDebugInfo();
 
   return Changed;
 }
@@ -1014,11 +378,9 @@ llvm::makeSubprogramMap(const Module &M) {
     return R;
 
   for (MDNode *N : CU_Nodes->operands()) {
-    DICompileUnit CUNode(N);
-    DIArray SPs = CUNode.getSubprograms();
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      if (Function *F = SP.getFunction())
+    DICompileUnit CUNode = cast<MDCompileUnit>(N);
+    for (DISubprogram SP : CUNode->getSubprograms()) {
+      if (Function *F = SP->getFunction())
         R.insert(std::make_pair(F, SP));
     }
   }
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 89ec1bc..f6f2ff2 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -14,6 +14,7 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "LLVMContextImpl.h"
 #include "MetadataImpl.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Function.h"
 
 using namespace llvm;
@@ -44,6 +45,7 @@ MDLocation *MDLocation::getImpl(LLVMContext &Context, unsigned Line,
   // Fixup column.
   adjustColumn(Column);
 
+  assert(Scope && "Expected scope");
   if (Storage == Uniqued) {
     if (auto *N =
             getUniqued(Context.pImpl->MDLocations,
@@ -64,6 +66,96 @@ MDLocation *MDLocation::getImpl(LLVMContext &Context, unsigned Line,
                    Storage, Context.pImpl->MDLocations);
 }
 
+unsigned MDLocation::computeNewDiscriminator() const {
+  // FIXME: This seems completely wrong.
+  //
+  //  1. If two modules are generated in the same context, then the second
+  //     Module will get different discriminators than it would have if it were
+  //     generated in its own context.
+  //  2. If this function is called after round-tripping to bitcode instead of
+  //     before, it will give a different (and potentially incorrect!) return.
+  //
+  // The discriminator should instead be calculated from local information
+  // where it's actually needed.  This logic should be moved to
+  // AddDiscriminators::runOnFunction(), where it doesn't pollute the
+  // LLVMContext.
+  std::pair<const char *, unsigned> Key(getFilename().data(), getLine());
+  return ++getContext().pImpl->DiscriminatorTable[Key];
+}
+
+unsigned DebugNode::getFlag(StringRef Flag) {
+  return StringSwitch<unsigned>(Flag)
+#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
+#include "llvm/IR/DebugInfoFlags.def"
+      .Default(0);
+}
+
+const char *DebugNode::getFlagString(unsigned Flag) {
+  switch (Flag) {
+  default:
+    return "";
+#define HANDLE_DI_FLAG(ID, NAME)                                               \
+  case Flag##NAME:                                                             \
+    return "DIFlag" #NAME;
+#include "llvm/IR/DebugInfoFlags.def"
+  }
+}
+
+unsigned DebugNode::splitFlags(unsigned Flags,
+                               SmallVectorImpl<unsigned> &SplitFlags) {
+  // Accessibility flags need to be specially handled, since they're packed
+  // together.
+  if (unsigned A = Flags & FlagAccessibility) {
+    if (A == FlagPrivate)
+      SplitFlags.push_back(FlagPrivate);
+    else if (A == FlagProtected)
+      SplitFlags.push_back(FlagProtected);
+    else
+      SplitFlags.push_back(FlagPublic);
+    Flags &= ~A;
+  }
+
+#define HANDLE_DI_FLAG(ID, NAME)                                               \
+  if (unsigned Bit = Flags & ID) {                                             \
+    SplitFlags.push_back(Bit);                                                 \
+    Flags &= ~Bit;                                                             \
+  }
+#include "llvm/IR/DebugInfoFlags.def"
+
+  return Flags;
+}
+
+MDScopeRef MDScope::getScope() const {
+  if (auto *T = dyn_cast<MDType>(this))
+    return T->getScope();
+
+  if (auto *SP = dyn_cast<MDSubprogram>(this))
+    return SP->getScope();
+
+  if (auto *LB = dyn_cast<MDLexicalBlockBase>(this))
+    return MDScopeRef(LB->getScope());
+
+  if (auto *NS = dyn_cast<MDNamespace>(this))
+    return MDScopeRef(NS->getScope());
+
+  assert((isa<MDFile>(this) || isa<MDCompileUnit>(this)) &&
+         "Unhandled type of scope.");
+  return nullptr;
+}
+
+StringRef MDScope::getName() const {
+  if (auto *T = dyn_cast<MDType>(this))
+    return T->getName();
+  if (auto *SP = dyn_cast<MDSubprogram>(this))
+    return SP->getName();
+  if (auto *NS = dyn_cast<MDNamespace>(this))
+    return NS->getName();
+  assert((isa<MDLexicalBlockBase>(this) || isa<MDFile>(this) ||
+          isa<MDCompileUnit>(this)) &&
+         "Unhandled type of scope.");
+  return "";
+}
+
 static StringRef getString(const MDString *S) {
   if (S)
     return S->getString();
@@ -237,6 +329,12 @@ MDCompileUnit *MDCompileUnit::getImpl(
       (SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind), Ops);
 }
 
+MDSubprogram *MDLocalScope::getSubprogram() const {
+  if (auto *Block = dyn_cast<MDLexicalBlockBase>(this))
+    return Block->getScope()->getSubprogram();
+  return const_cast<MDSubprogram *>(cast<MDSubprogram>(this));
+}
+
 MDSubprogram *MDSubprogram::getImpl(
     LLVMContext &Context, Metadata *Scope, MDString *Name,
     MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
@@ -262,6 +360,21 @@ MDSubprogram *MDSubprogram::getImpl(
                        Ops);
 }
 
+Function *MDSubprogram::getFunction() const {
+  // FIXME: Should this be looking through bitcasts?
+  return dyn_cast_or_null<Function>(getFunctionConstant());
+}
+
+bool MDSubprogram::describes(const Function *F) const {
+  assert(F && "Invalid function");
+  if (F == getFunction())
+    return true;
+  StringRef Name = getLinkageName();
+  if (Name.empty())
+    Name = getName();
+  return F->getName() == Name;
+}
+
 void MDSubprogram::replaceFunction(Function *F) {
   replaceFunction(F ? ConstantAsMetadata::get(F)
                     : static_cast<ConstantAsMetadata *>(nullptr));
@@ -271,6 +384,7 @@ MDLexicalBlock *MDLexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope,
                                         Metadata *File, unsigned Line,
                                         unsigned Column, StorageType Storage,
                                         bool ShouldCreate) {
+  assert(Scope && "Expected scope");
   DEFINE_GETIMPL_LOOKUP(MDLexicalBlock, (Scope, File, Line, Column));
   Metadata *Ops[] = {File, Scope};
   DEFINE_GETIMPL_STORE(MDLexicalBlock, (Line, Column), Ops);
@@ -281,6 +395,7 @@ MDLexicalBlockFile *MDLexicalBlockFile::getImpl(LLVMContext &Context,
                                                 unsigned Discriminator,
                                                 StorageType Storage,
                                                 bool ShouldCreate) {
+  assert(Scope && "Expected scope");
   DEFINE_GETIMPL_LOOKUP(MDLexicalBlockFile, (Scope, File, Discriminator));
   Metadata *Ops[] = {File, Scope};
   DEFINE_GETIMPL_STORE(MDLexicalBlockFile, (Discriminator), Ops);
@@ -335,20 +450,23 @@ MDGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
                        Ops);
 }
 
-MDLocalVariable *MDLocalVariable::getImpl(
-    LLVMContext &Context, unsigned Tag, Metadata *Scope, MDString *Name,
-    Metadata *File, unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags,
-    Metadata *InlinedAt, StorageType Storage, bool ShouldCreate) {
+MDLocalVariable *MDLocalVariable::getImpl(LLVMContext &Context, unsigned Tag,
+                                          Metadata *Scope, MDString *Name,
+                                          Metadata *File, unsigned Line,
+                                          Metadata *Type, unsigned Arg,
+                                          unsigned Flags, StorageType Storage,
+                                          bool ShouldCreate) {
   // Truncate Arg to 8 bits.
   //
   // FIXME: This is gross (and should be changed to an assert or removed), but
   // it matches historical behaviour for now.
   Arg &= (1u << 8) - 1;
 
+  assert(Scope && "Expected scope");
   assert(isCanonical(Name) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(MDLocalVariable, (Tag, Scope, getString(Name), File,
-                                          Line, Type, Arg, Flags, InlinedAt));
-  Metadata *Ops[] = {Scope, Name, File, Type, InlinedAt};
+                                          Line, Type, Arg, Flags));
+  Metadata *Ops[] = {Scope, Name, File, Type};
   DEFINE_GETIMPL_STORE(MDLocalVariable, (Tag, Line, Arg, Flags), Ops);
 }
 
@@ -391,6 +509,24 @@ bool MDExpression::isValid() const {
   return true;
 }
 
+bool MDExpression::isBitPiece() const {
+  assert(isValid() && "Expected valid expression");
+  if (unsigned N = getNumElements())
+    if (N >= 3)
+      return getElement(N - 3) == dwarf::DW_OP_bit_piece;
+  return false;
+}
+
+uint64_t MDExpression::getBitPieceOffset() const {
+  assert(isBitPiece() && "Expected bit piece");
+  return getElement(getNumElements() - 2);
+}
+
+uint64_t MDExpression::getBitPieceSize() const {
+  assert(isBitPiece() && "Expected bit piece");
+  return getElement(getNumElements() - 1);
+}
+
 MDObjCProperty *MDObjCProperty::getImpl(
     LLVMContext &Context, MDString *Name, Metadata *File, unsigned Line,
     MDString *GetterName, MDString *SetterName, unsigned Attributes,
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index e1bf795..4cf7e9e 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -16,101 +16,87 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 // DebugLoc Implementation
 //===----------------------------------------------------------------------===//
+DebugLoc::DebugLoc(const MDLocation *L) : Loc(const_cast<MDLocation *>(L)) {}
+DebugLoc::DebugLoc(const MDNode *L) : Loc(const_cast<MDNode *>(L)) {}
 
-unsigned DebugLoc::getLine() const { return DILocation(Loc).getLineNumber(); }
-unsigned DebugLoc::getCol() const { return DILocation(Loc).getColumnNumber(); }
+MDLocation *DebugLoc::get() const {
+  return cast_or_null<MDLocation>(Loc.get());
+}
 
-MDNode *DebugLoc::getScope() const { return DILocation(Loc).getScope(); }
+unsigned DebugLoc::getLine() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getLine();
+}
 
-MDNode *DebugLoc::getInlinedAt() const {
-  return DILocation(Loc).getOrigLocation();
+unsigned DebugLoc::getCol() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getColumn();
 }
 
-/// Return both the Scope and the InlinedAt values.
-void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA) const {
-  Scope = getScope();
-  IA = getInlinedAt();
+MDNode *DebugLoc::getScope() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getScope();
 }
 
-MDNode *DebugLoc::getScopeNode() const {
-  if (MDNode *InlinedAt = getInlinedAt())
-    return DebugLoc::getFromDILocation(InlinedAt).getScopeNode();
-  return getScope();
+MDLocation *DebugLoc::getInlinedAt() const {
+  assert(get() && "Expected valid DebugLoc");
+  return get()->getInlinedAt();
+}
+
+MDNode *DebugLoc::getInlinedAtScope() const {
+  return cast<MDLocation>(Loc)->getInlinedAtScope();
 }
 
 DebugLoc DebugLoc::getFnDebugLoc() const {
-  const MDNode *Scope = getScopeNode();
-  DISubprogram SP = getDISubprogram(Scope);
-  if (SP.isSubprogram())
-    return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+  // FIXME: Add a method on \a MDLocation that does this work.
+  const MDNode *Scope = getInlinedAtScope();
+  if (DISubprogram SP = getDISubprogram(Scope))
+    return DebugLoc::get(SP->getScopeLine(), 0, SP);
 
   return DebugLoc();
 }
 
-DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
-                       MDNode *Scope, MDNode *InlinedAt) {
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope,
+                       const MDNode *InlinedAt) {
   // If no scope is available, this is an unknown location.
   if (!Scope)
     return DebugLoc();
 
-  return getFromDILocation(
-      MDLocation::get(Scope->getContext(), Line, Col, Scope, InlinedAt));
-}
-
-/// getAsMDNode - This method converts the compressed DebugLoc node into a
-/// DILocation-compatible MDNode.
-MDNode *DebugLoc::getAsMDNode() const { return Loc; }
-
-/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
-DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
-  DebugLoc Loc;
-  Loc.Loc.reset(N);
-  return Loc;
-}
-
-/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
-DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
-  DILexicalBlock LexBlock(N);
-  MDNode *Scope = LexBlock.getContext();
-  if (!Scope) return DebugLoc();
-  return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope,
-             nullptr);
+  return MDLocation::get(Scope->getContext(), Line, Col,
+                         const_cast<MDNode *>(Scope),
+                         const_cast<MDNode *>(InlinedAt));
 }
 
 void DebugLoc::dump() const {
 #ifndef NDEBUG
-  if (!isUnknown()) {
-    dbgs() << getLine();
-    if (getCol() != 0)
-      dbgs() << ',' << getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt());
-    if (!InlinedAtDL.isUnknown()) {
-      dbgs() << " @ ";
-      InlinedAtDL.dump();
-    } else
-      dbgs() << "\n";
-  }
+  if (!Loc)
+    return;
+
+  dbgs() << getLine();
+  if (getCol() != 0)
+    dbgs() << ',' << getCol();
+  if (DebugLoc InlinedAtDL = DebugLoc(getInlinedAt())) {
+    dbgs() << " @ ";
+    InlinedAtDL.dump();
+  } else
+    dbgs() << "\n";
 #endif
 }
 
 void DebugLoc::print(raw_ostream &OS) const {
-  if (!isUnknown()) {
-    // Print source line info.
-    DIScope Scope(getScope());
-    assert((!Scope || Scope.isScope()) &&
-           "Scope of a DebugLoc should be null or a DIScope.");
-    if (Scope)
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << getLine();
-    if (getCol() != 0)
-      OS << ':' << getCol();
-    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt());
-    if (!InlinedAtDL.isUnknown()) {
-      OS << " @[ ";
-      InlinedAtDL.print(OS);
-      OS << " ]";
-    }
+  if (!Loc)
+    return;
+
+  // Print source line info.
+  auto *Scope = cast<MDScope>(getScope());
+  OS << Scope->getFilename();
+  OS << ':' << getLine();
+  if (getCol() != 0)
+    OS << ':' << getCol();
+
+  if (DebugLoc InlinedAtDL = getInlinedAt()) {
+    OS << " @[ ";
+    InlinedAtDL.print(OS);
+    OS << " ]";
   }
 }
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index 5608589..91635f6 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -129,16 +129,16 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
 }
 
 bool DiagnosticInfoOptimizationBase::isLocationAvailable() const {
-  return !getDebugLoc().isUnknown();
+  return getDebugLoc();
 }
 
 void DiagnosticInfoOptimizationBase::getLocation(StringRef *Filename,
                                                  unsigned *Line,
                                                  unsigned *Column) const {
-  DILocation DIL(getDebugLoc().getAsMDNode(getFunction().getContext()));
-  *Filename = DIL.getFilename();
-  *Line = DIL.getLineNumber();
-  *Column = DIL.getColumnNumber();
+  MDLocation *L = getDebugLoc();
+  *Filename = L->getFilename();
+  *Line = L->getLine();
+  *Column = L->getColumn();
 }
 
 const std::string DiagnosticInfoOptimizationBase::getLocationStr() const {
@@ -147,7 +147,7 @@ const std::string DiagnosticInfoOptimizationBase::getLocationStr() const {
   unsigned Column = 0;
   if (isLocationAvailable())
     getLocation(&Filename, &Line, &Column);
-  return Twine(Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
+  return (Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
 }
 
 void DiagnosticInfoOptimizationBase::print(DiagnosticPrinter &DP) const {
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 33e1526..d3a0934 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -215,9 +215,7 @@ LLVMContext &Function::getContext() const {
   return getType()->getContext();
 }
 
-FunctionType *Function::getFunctionType() const {
-  return cast<FunctionType>(getType()->getElementType());
-}
+FunctionType *Function::getFunctionType() const { return Ty; }
 
 bool Function::isVarArg() const {
   return getFunctionType()->isVarArg();
@@ -242,7 +240,8 @@ void Function::eraseFromParent() {
 Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
                    Module *ParentModule)
     : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal, nullptr, 0,
-                   Linkage, name) {
+                   Linkage, name),
+      Ty(Ty) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
          "invalid return type");
   setIsMaterializable(false);
@@ -349,6 +348,12 @@ void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   setAttributes(PAL);
 }
 
+void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 // Maintain the GC name for each function in an on-the-side table. This saves
 // allocating an additional word in Function for programs which do not use GC
 // (i.e., most programs) at the cost of increased overhead for clients which do
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index 7010ceb..6ed5891 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -555,7 +555,7 @@ FileInfo::openCoveragePath(StringRef CoveragePath) {
     return llvm::make_unique<raw_null_ostream>();
 
   std::error_code EC;
-  auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath.str(), EC,
+  auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath, EC,
                                               sys::fs::F_Text);
   if (EC) {
     errs() << EC.message() << "\n";
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index 90303b2..06f54c7 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -23,7 +23,8 @@ using namespace llvm;
 /// has array of i8 type filled in with the nul terminated string value
 /// specified.  If Name is specified, it is the name of the global variable
 /// created.
-Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
+GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
+                                                  const Twine &Name) {
   Constant *StrConstant = ConstantDataArray::getString(Context, Str);
   Module &M = *BB->getParent()->getParent();
   GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index 91ccfbb..c1ac336 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -21,11 +21,14 @@
 using namespace llvm;
 
 PrintModulePass::PrintModulePass() : OS(dbgs()) {}
-PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner)
-    : OS(OS), Banner(Banner) {}
+PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
+                                 bool ShouldPreserveUseListOrder)
+    : OS(OS), Banner(Banner),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
 PreservedAnalyses PrintModulePass::run(Module &M) {
-  OS << Banner << M;
+  OS << Banner;
+  M.print(OS, nullptr, ShouldPreserveUseListOrder);
   return PreservedAnalyses::all();
 }
 
@@ -46,8 +49,9 @@ class PrintModulePassWrapper : public ModulePass {
 public:
   static char ID;
   PrintModulePassWrapper() : ModulePass(ID) {}
-  PrintModulePassWrapper(raw_ostream &OS, const std::string &Banner)
-      : ModulePass(ID), P(OS, Banner) {}
+  PrintModulePassWrapper(raw_ostream &OS, const std::string &Banner,
+                         bool ShouldPreserveUseListOrder)
+      : ModulePass(ID), P(OS, Banner, ShouldPreserveUseListOrder) {}
 
   bool runOnModule(Module &M) override {
     P.run(M);
@@ -112,8 +116,9 @@ INITIALIZE_PASS(PrintBasicBlockPass, "print-bb", "Print BB to stderr", false,
                 false)
 
 ModulePass *llvm::createPrintModulePass(llvm::raw_ostream &OS,
-                                        const std::string &Banner) {
-  return new PrintModulePassWrapper(OS, Banner);
+                                        const std::string &Banner,
+                                        bool ShouldPreserveUseListOrder) {
+  return new PrintModulePassWrapper(OS, Banner, ShouldPreserveUseListOrder);
 }
 
 FunctionPass *llvm::createPrintFunctionPass(llvm::raw_ostream &OS,
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index b456d9f..aa9e027 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -167,7 +167,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
         // Note that operand #n has a matching input.
         scInfo.MatchingInput = ConstraintsSoFar.size();
       } else {
-        if (ConstraintsSoFar[N].hasMatchingInput())
+        if (ConstraintsSoFar[N].hasMatchingInput() &&
+            (size_t)ConstraintsSoFar[N].MatchingInput !=
+                ConstraintsSoFar.size())
           return true;
         // Note that operand #n has a matching input.
         ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 7d9bd7e..57c143c 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -62,8 +62,8 @@ void Instruction::removeFromParent() {
   getParent()->getInstList().remove(this);
 }
 
-void Instruction::eraseFromParent() {
-  getParent()->getInstList().erase(this);
+iplist<Instruction>::iterator Instruction::eraseFromParent() {
+  return getParent()->getInstList().erase(this);
 }
 
 /// insertBefore - Insert an unlinked instructions into a basic block
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index af2aeb9..85b7521 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -352,6 +352,12 @@ void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   setAttributes(PAL);
 }
 
+void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 bool CallInst::hasFnAttrImpl(Attribute::AttrKind A) const {
   if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
     return true;
@@ -617,6 +623,12 @@ void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   setAttributes(PAL);
 }
 
+void InvokeInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
+  AttributeSet PAL = getAttributes();
+  PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
+  setAttributes(PAL);
+}
+
 LandingPadInst *InvokeInst::getLandingPadInst() const {
   return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
 }
@@ -943,12 +955,10 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
     : LoadInst(Ptr, Name, isVolatile, Align, NotAtomic, CrossThread, InsertAE) {
 }
 
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
                    unsigned Align, AtomicOrdering Order,
-                   SynchronizationScope SynchScope,
-                   Instruction *InsertBef)
-  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
-                     Load, Ptr, InsertBef) {
+                   SynchronizationScope SynchScope, Instruction *InsertBef)
+    : UnaryInstruction(Ty, Load, Ptr, InsertBef) {
   setVolatile(isVolatile);
   setAlignment(Align);
   setAtomic(Order, SynchScope);
@@ -1258,11 +1268,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
 /// pointer type.
 ///
 template <typename IndexTy>
-static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
-  PointerType *PTy = dyn_cast<PointerType>(Ptr->getScalarType());
-  if (!PTy) return nullptr;   // Type isn't a pointer type!
-  Type *Agg = PTy->getElementType();
-
+static Type *getIndexedTypeInternal(Type *Agg, ArrayRef<IndexTy> IdxList) {
   // Handle the special case of the empty set index set, which is always valid.
   if (IdxList.empty())
     return Agg;
@@ -1283,17 +1289,17 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
   return CurIdx == IdxList.size() ? Agg : nullptr;
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<Value *> IdxList) {
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr,
+Type *GetElementPtrInst::getIndexedType(Type *Ty,
                                         ArrayRef<Constant *> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
-Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
-  return getIndexedTypeInternal(Ptr, IdxList);
+Type *GetElementPtrInst::getIndexedType(Type *Ty, ArrayRef<uint64_t> IdxList) {
+  return getIndexedTypeInternal(Ty, IdxList);
 }
 
 /// hasAllZeroIndices - Return true if all of the indices of this GEP are
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index e380665..c096a83 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -275,15 +275,17 @@ template <> struct MDNodeKeyImpl<GenericDebugNode> : MDNodeOpsKey {
 
 template <> struct MDNodeKeyImpl<MDSubrange> {
   int64_t Count;
-  int64_t Lo;
+  int64_t LowerBound;
 
-  MDNodeKeyImpl(int64_t Count, int64_t Lo) : Count(Count), Lo(Lo) {}
-  MDNodeKeyImpl(const MDSubrange *N) : Count(N->getCount()), Lo(N->getLo()) {}
+  MDNodeKeyImpl(int64_t Count, int64_t LowerBound)
+      : Count(Count), LowerBound(LowerBound) {}
+  MDNodeKeyImpl(const MDSubrange *N)
+      : Count(N->getCount()), LowerBound(N->getLowerBound()) {}
 
   bool isKeyOf(const MDSubrange *RHS) const {
-    return Count == RHS->getCount() && Lo == RHS->getLo();
+    return Count == RHS->getCount() && LowerBound == RHS->getLowerBound();
   }
-  unsigned getHashValue() const { return hash_combine(Count, Lo); }
+  unsigned getHashValue() const { return hash_combine(Count, LowerBound); }
 };
 
 template <> struct MDNodeKeyImpl<MDEnumerator> {
@@ -347,20 +349,20 @@ template <> struct MDNodeKeyImpl<MDDerivedType> {
         BaseType(BaseType), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
         OffsetInBits(OffsetInBits), Flags(Flags), ExtraData(ExtraData) {}
   MDNodeKeyImpl(const MDDerivedType *N)
-      : Tag(N->getTag()), Name(N->getName()), File(N->getFile()),
-        Line(N->getLine()), Scope(N->getScope()), BaseType(N->getBaseType()),
-        SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()),
-        OffsetInBits(N->getOffsetInBits()), Flags(N->getFlags()),
-        ExtraData(N->getExtraData()) {}
+      : Tag(N->getTag()), Name(N->getName()), File(N->getRawFile()),
+        Line(N->getLine()), Scope(N->getRawScope()),
+        BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
+        AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()),
+        Flags(N->getFlags()), ExtraData(N->getRawExtraData()) {}
 
   bool isKeyOf(const MDDerivedType *RHS) const {
     return Tag == RHS->getTag() && Name == RHS->getName() &&
-           File == RHS->getFile() && Line == RHS->getLine() &&
-           Scope == RHS->getScope() && BaseType == RHS->getBaseType() &&
+           File == RHS->getRawFile() && Line == RHS->getLine() &&
+           Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() &&
            SizeInBits == RHS->getSizeInBits() &&
            AlignInBits == RHS->getAlignInBits() &&
            OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() &&
-           ExtraData == RHS->getExtraData();
+           ExtraData == RHS->getRawExtraData();
   }
   unsigned getHashValue() const {
     return hash_combine(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
@@ -397,26 +399,26 @@ template <> struct MDNodeKeyImpl<MDCompositeType> {
         RuntimeLang(RuntimeLang), VTableHolder(VTableHolder),
         TemplateParams(TemplateParams), Identifier(Identifier) {}
   MDNodeKeyImpl(const MDCompositeType *N)
-      : Tag(N->getTag()), Name(N->getName()), File(N->getFile()),
-        Line(N->getLine()), Scope(N->getScope()), BaseType(N->getBaseType()),
-        SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()),
-        OffsetInBits(N->getOffsetInBits()), Flags(N->getFlags()),
-        Elements(N->getElements()), RuntimeLang(N->getRuntimeLang()),
-        VTableHolder(N->getVTableHolder()),
-        TemplateParams(N->getTemplateParams()), Identifier(N->getIdentifier()) {
-  }
+      : Tag(N->getTag()), Name(N->getName()), File(N->getRawFile()),
+        Line(N->getLine()), Scope(N->getRawScope()),
+        BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
+        AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()),
+        Flags(N->getFlags()), Elements(N->getRawElements()),
+        RuntimeLang(N->getRuntimeLang()), VTableHolder(N->getRawVTableHolder()),
+        TemplateParams(N->getRawTemplateParams()),
+        Identifier(N->getIdentifier()) {}
 
   bool isKeyOf(const MDCompositeType *RHS) const {
     return Tag == RHS->getTag() && Name == RHS->getName() &&
-           File == RHS->getFile() && Line == RHS->getLine() &&
-           Scope == RHS->getScope() && BaseType == RHS->getBaseType() &&
+           File == RHS->getRawFile() && Line == RHS->getLine() &&
+           Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() &&
            SizeInBits == RHS->getSizeInBits() &&
            AlignInBits == RHS->getAlignInBits() &&
            OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() &&
-           Elements == RHS->getElements() &&
+           Elements == RHS->getRawElements() &&
            RuntimeLang == RHS->getRuntimeLang() &&
-           VTableHolder == RHS->getVTableHolder() &&
-           TemplateParams == RHS->getTemplateParams() &&
+           VTableHolder == RHS->getRawVTableHolder() &&
+           TemplateParams == RHS->getRawTemplateParams() &&
            Identifier == RHS->getIdentifier();
   }
   unsigned getHashValue() const {
@@ -433,10 +435,10 @@ template <> struct MDNodeKeyImpl<MDSubroutineType> {
   MDNodeKeyImpl(int64_t Flags, Metadata *TypeArray)
       : Flags(Flags), TypeArray(TypeArray) {}
   MDNodeKeyImpl(const MDSubroutineType *N)
-      : Flags(N->getFlags()), TypeArray(N->getTypeArray()) {}
+      : Flags(N->getFlags()), TypeArray(N->getRawTypeArray()) {}
 
   bool isKeyOf(const MDSubroutineType *RHS) const {
-    return Flags == RHS->getFlags() && TypeArray == RHS->getTypeArray();
+    return Flags == RHS->getFlags() && TypeArray == RHS->getRawTypeArray();
   }
   unsigned getHashValue() const { return hash_combine(Flags, TypeArray); }
 };
@@ -484,27 +486,28 @@ template <> struct MDNodeKeyImpl<MDCompileUnit> {
         Subprograms(Subprograms), GlobalVariables(GlobalVariables),
         ImportedEntities(ImportedEntities) {}
   MDNodeKeyImpl(const MDCompileUnit *N)
-      : SourceLanguage(N->getSourceLanguage()), File(N->getFile()),
+      : SourceLanguage(N->getSourceLanguage()), File(N->getRawFile()),
         Producer(N->getProducer()), IsOptimized(N->isOptimized()),
         Flags(N->getFlags()), RuntimeVersion(N->getRuntimeVersion()),
         SplitDebugFilename(N->getSplitDebugFilename()),
-        EmissionKind(N->getEmissionKind()), EnumTypes(N->getEnumTypes()),
-        RetainedTypes(N->getRetainedTypes()), Subprograms(N->getSubprograms()),
-        GlobalVariables(N->getGlobalVariables()),
-        ImportedEntities(N->getImportedEntities()) {}
+        EmissionKind(N->getEmissionKind()), EnumTypes(N->getRawEnumTypes()),
+        RetainedTypes(N->getRawRetainedTypes()),
+        Subprograms(N->getRawSubprograms()),
+        GlobalVariables(N->getRawGlobalVariables()),
+        ImportedEntities(N->getRawImportedEntities()) {}
 
   bool isKeyOf(const MDCompileUnit *RHS) const {
     return SourceLanguage == RHS->getSourceLanguage() &&
-           File == RHS->getFile() && Producer == RHS->getProducer() &&
+           File == RHS->getRawFile() && Producer == RHS->getProducer() &&
            IsOptimized == RHS->isOptimized() && Flags == RHS->getFlags() &&
            RuntimeVersion == RHS->getRuntimeVersion() &&
            SplitDebugFilename == RHS->getSplitDebugFilename() &&
            EmissionKind == RHS->getEmissionKind() &&
-           EnumTypes == RHS->getEnumTypes() &&
-           RetainedTypes == RHS->getRetainedTypes() &&
-           Subprograms == RHS->getSubprograms() &&
-           GlobalVariables == RHS->getGlobalVariables() &&
-           ImportedEntities == RHS->getImportedEntities();
+           EnumTypes == RHS->getRawEnumTypes() &&
+           RetainedTypes == RHS->getRawRetainedTypes() &&
+           Subprograms == RHS->getRawSubprograms() &&
+           GlobalVariables == RHS->getRawGlobalVariables() &&
+           ImportedEntities == RHS->getRawImportedEntities();
   }
   unsigned getHashValue() const {
     return hash_combine(SourceLanguage, File, Producer, IsOptimized, Flags,
@@ -549,31 +552,32 @@ template <> struct MDNodeKeyImpl<MDSubprogram> {
         Function(Function), TemplateParams(TemplateParams),
         Declaration(Declaration), Variables(Variables) {}
   MDNodeKeyImpl(const MDSubprogram *N)
-      : Scope(N->getScope()), Name(N->getName()),
-        LinkageName(N->getLinkageName()), File(N->getFile()),
-        Line(N->getLine()), Type(N->getType()),
+      : Scope(N->getRawScope()), Name(N->getName()),
+        LinkageName(N->getLinkageName()), File(N->getRawFile()),
+        Line(N->getLine()), Type(N->getRawType()),
         IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
-        ScopeLine(N->getScopeLine()), ContainingType(N->getContainingType()),
+        ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()),
         Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()),
         Flags(N->getFlags()), IsOptimized(N->isOptimized()),
-        Function(N->getFunction()), TemplateParams(N->getTemplateParams()),
-        Declaration(N->getDeclaration()), Variables(N->getVariables()) {}
+        Function(N->getRawFunction()),
+        TemplateParams(N->getRawTemplateParams()),
+        Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {}
 
   bool isKeyOf(const MDSubprogram *RHS) const {
-    return Scope == RHS->getScope() && Name == RHS->getName() &&
-           LinkageName == RHS->getLinkageName() && File == RHS->getFile() &&
-           Line == RHS->getLine() && Type == RHS->getType() &&
+    return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+           LinkageName == RHS->getLinkageName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
            IsLocalToUnit == RHS->isLocalToUnit() &&
            IsDefinition == RHS->isDefinition() &&
            ScopeLine == RHS->getScopeLine() &&
-           ContainingType == RHS->getContainingType() &&
+           ContainingType == RHS->getRawContainingType() &&
            Virtuality == RHS->getVirtuality() &&
            VirtualIndex == RHS->getVirtualIndex() && Flags == RHS->getFlags() &&
            IsOptimized == RHS->isOptimized() &&
-           Function == RHS->getFunction() &&
-           TemplateParams == RHS->getTemplateParams() &&
-           Declaration == RHS->getDeclaration() &&
-           Variables == RHS->getVariables();
+           Function == RHS->getRawFunction() &&
+           TemplateParams == RHS->getRawTemplateParams() &&
+           Declaration == RHS->getRawDeclaration() &&
+           Variables == RHS->getRawVariables();
   }
   unsigned getHashValue() const {
     return hash_combine(Scope, Name, LinkageName, File, Line, Type,
@@ -592,11 +596,11 @@ template <> struct MDNodeKeyImpl<MDLexicalBlock> {
   MDNodeKeyImpl(Metadata *Scope, Metadata *File, unsigned Line, unsigned Column)
       : Scope(Scope), File(File), Line(Line), Column(Column) {}
   MDNodeKeyImpl(const MDLexicalBlock *N)
-      : Scope(N->getScope()), File(N->getFile()), Line(N->getLine()),
+      : Scope(N->getRawScope()), File(N->getRawFile()), Line(N->getLine()),
         Column(N->getColumn()) {}
 
   bool isKeyOf(const MDLexicalBlock *RHS) const {
-    return Scope == RHS->getScope() && File == RHS->getFile() &&
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
            Line == RHS->getLine() && Column == RHS->getColumn();
   }
   unsigned getHashValue() const {
@@ -612,11 +616,11 @@ template <> struct MDNodeKeyImpl<MDLexicalBlockFile> {
   MDNodeKeyImpl(Metadata *Scope, Metadata *File, unsigned Discriminator)
       : Scope(Scope), File(File), Discriminator(Discriminator) {}
   MDNodeKeyImpl(const MDLexicalBlockFile *N)
-      : Scope(N->getScope()), File(N->getFile()),
+      : Scope(N->getRawScope()), File(N->getRawFile()),
         Discriminator(N->getDiscriminator()) {}
 
   bool isKeyOf(const MDLexicalBlockFile *RHS) const {
-    return Scope == RHS->getScope() && File == RHS->getFile() &&
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
            Discriminator == RHS->getDiscriminator();
   }
   unsigned getHashValue() const {
@@ -633,11 +637,11 @@ template <> struct MDNodeKeyImpl<MDNamespace> {
   MDNodeKeyImpl(Metadata *Scope, Metadata *File, StringRef Name, unsigned Line)
       : Scope(Scope), File(File), Name(Name), Line(Line) {}
   MDNodeKeyImpl(const MDNamespace *N)
-      : Scope(N->getScope()), File(N->getFile()), Name(N->getName()),
+      : Scope(N->getRawScope()), File(N->getRawFile()), Name(N->getName()),
         Line(N->getLine()) {}
 
   bool isKeyOf(const MDNamespace *RHS) const {
-    return Scope == RHS->getScope() && File == RHS->getFile() &&
+    return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
            Name == RHS->getName() && Line == RHS->getLine();
   }
   unsigned getHashValue() const {
@@ -651,10 +655,10 @@ template <> struct MDNodeKeyImpl<MDTemplateTypeParameter> {
 
   MDNodeKeyImpl(StringRef Name, Metadata *Type) : Name(Name), Type(Type) {}
   MDNodeKeyImpl(const MDTemplateTypeParameter *N)
-      : Name(N->getName()), Type(N->getType()) {}
+      : Name(N->getName()), Type(N->getRawType()) {}
 
   bool isKeyOf(const MDTemplateTypeParameter *RHS) const {
-    return Name == RHS->getName() && Type == RHS->getType();
+    return Name == RHS->getName() && Type == RHS->getRawType();
   }
   unsigned getHashValue() const { return hash_combine(Name, Type); }
 };
@@ -668,12 +672,12 @@ template <> struct MDNodeKeyImpl<MDTemplateValueParameter> {
   MDNodeKeyImpl(unsigned Tag, StringRef Name, Metadata *Type, Metadata *Value)
       : Tag(Tag), Name(Name), Type(Type), Value(Value) {}
   MDNodeKeyImpl(const MDTemplateValueParameter *N)
-      : Tag(N->getTag()), Name(N->getName()), Type(N->getType()),
+      : Tag(N->getTag()), Name(N->getName()), Type(N->getRawType()),
         Value(N->getValue()) {}
 
   bool isKeyOf(const MDTemplateValueParameter *RHS) const {
     return Tag == RHS->getTag() && Name == RHS->getName() &&
-           Type == RHS->getType() && Value == RHS->getValue();
+           Type == RHS->getRawType() && Value == RHS->getValue();
   }
   unsigned getHashValue() const { return hash_combine(Tag, Name, Type, Value); }
 };
@@ -699,21 +703,22 @@ template <> struct MDNodeKeyImpl<MDGlobalVariable> {
         IsDefinition(IsDefinition), Variable(Variable),
         StaticDataMemberDeclaration(StaticDataMemberDeclaration) {}
   MDNodeKeyImpl(const MDGlobalVariable *N)
-      : Scope(N->getScope()), Name(N->getName()),
-        LinkageName(N->getLinkageName()), File(N->getFile()),
-        Line(N->getLine()), Type(N->getType()),
+      : Scope(N->getRawScope()), Name(N->getName()),
+        LinkageName(N->getLinkageName()), File(N->getRawFile()),
+        Line(N->getLine()), Type(N->getRawType()),
         IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
-        Variable(N->getVariable()),
-        StaticDataMemberDeclaration(N->getStaticDataMemberDeclaration()) {}
+        Variable(N->getRawVariable()),
+        StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()) {}
 
   bool isKeyOf(const MDGlobalVariable *RHS) const {
-    return Scope == RHS->getScope() && Name == RHS->getName() &&
-           LinkageName == RHS->getLinkageName() && File == RHS->getFile() &&
-           Line == RHS->getLine() && Type == RHS->getType() &&
+    return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+           LinkageName == RHS->getLinkageName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
            IsLocalToUnit == RHS->isLocalToUnit() &&
            IsDefinition == RHS->isDefinition() &&
-           Variable == RHS->getVariable() &&
-           StaticDataMemberDeclaration == RHS->getStaticDataMemberDeclaration();
+           Variable == RHS->getRawVariable() &&
+           StaticDataMemberDeclaration ==
+               RHS->getRawStaticDataMemberDeclaration();
   }
   unsigned getHashValue() const {
     return hash_combine(Scope, Name, LinkageName, File, Line, Type,
@@ -731,28 +736,24 @@ template <> struct MDNodeKeyImpl<MDLocalVariable> {
   Metadata *Type;
   unsigned Arg;
   unsigned Flags;
-  Metadata *InlinedAt;
 
   MDNodeKeyImpl(unsigned Tag, Metadata *Scope, StringRef Name, Metadata *File,
-                unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags,
-                Metadata *InlinedAt)
+                unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags)
       : Tag(Tag), Scope(Scope), Name(Name), File(File), Line(Line), Type(Type),
-        Arg(Arg), Flags(Flags), InlinedAt(InlinedAt) {}
+        Arg(Arg), Flags(Flags) {}
   MDNodeKeyImpl(const MDLocalVariable *N)
-      : Tag(N->getTag()), Scope(N->getScope()), Name(N->getName()),
-        File(N->getFile()), Line(N->getLine()), Type(N->getType()),
-        Arg(N->getArg()), Flags(N->getFlags()), InlinedAt(N->getInlinedAt()) {}
+      : Tag(N->getTag()), Scope(N->getRawScope()), Name(N->getName()),
+        File(N->getRawFile()), Line(N->getLine()), Type(N->getRawType()),
+        Arg(N->getArg()), Flags(N->getFlags()) {}
 
   bool isKeyOf(const MDLocalVariable *RHS) const {
-    return Tag == RHS->getTag() && Scope == RHS->getScope() &&
-           Name == RHS->getName() && File == RHS->getFile() &&
-           Line == RHS->getLine() && Type == RHS->getType() &&
-           Arg == RHS->getArg() && Flags == RHS->getFlags() &&
-           InlinedAt == RHS->getInlinedAt();
+    return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
+           Name == RHS->getName() && File == RHS->getRawFile() &&
+           Line == RHS->getLine() && Type == RHS->getRawType() &&
+           Arg == RHS->getArg() && Flags == RHS->getFlags();
   }
   unsigned getHashValue() const {
-    return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags,
-                        InlinedAt);
+    return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags);
   }
 };
 
@@ -785,15 +786,15 @@ template <> struct MDNodeKeyImpl<MDObjCProperty> {
       : Name(Name), File(File), Line(Line), GetterName(GetterName),
         SetterName(SetterName), Attributes(Attributes), Type(Type) {}
   MDNodeKeyImpl(const MDObjCProperty *N)
-      : Name(N->getName()), File(N->getFile()), Line(N->getLine()),
+      : Name(N->getName()), File(N->getRawFile()), Line(N->getLine()),
         GetterName(N->getGetterName()), SetterName(N->getSetterName()),
-        Attributes(N->getAttributes()), Type(N->getType()) {}
+        Attributes(N->getAttributes()), Type(N->getRawType()) {}
 
   bool isKeyOf(const MDObjCProperty *RHS) const {
-    return Name == RHS->getName() && File == RHS->getFile() &&
+    return Name == RHS->getName() && File == RHS->getRawFile() &&
            Line == RHS->getLine() && GetterName == RHS->getGetterName() &&
            SetterName == RHS->getSetterName() &&
-           Attributes == RHS->getAttributes() && Type == RHS->getType();
+           Attributes == RHS->getAttributes() && Type == RHS->getRawType();
   }
   unsigned getHashValue() const {
     return hash_combine(Name, File, Line, GetterName, SetterName, Attributes,
@@ -812,12 +813,12 @@ template <> struct MDNodeKeyImpl<MDImportedEntity> {
                 StringRef Name)
       : Tag(Tag), Scope(Scope), Entity(Entity), Line(Line), Name(Name) {}
   MDNodeKeyImpl(const MDImportedEntity *N)
-      : Tag(N->getTag()), Scope(N->getScope()), Entity(N->getEntity()),
+      : Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()),
         Line(N->getLine()), Name(N->getName()) {}
 
   bool isKeyOf(const MDImportedEntity *RHS) const {
-    return Tag == RHS->getTag() && Scope == RHS->getScope() &&
-           Entity == RHS->getEntity() && Line == RHS->getLine() &&
+    return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
+           Entity == RHS->getRawEntity() && Line == RHS->getLine() &&
            Name == RHS->getName();
   }
   unsigned getHashValue() const {
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index 9a365d1..6870032 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -293,7 +293,7 @@ public:
     Pass(PT_PassManager, ID), PMDataManager() { }
 
   // Delete on the fly managers.
-  virtual ~MPPassManager() {
+  ~MPPassManager() override {
     for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
            I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
          I != E; ++I) {
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 0ad3c5c..93098b9 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -446,6 +446,10 @@ void MDNode::makeUniqued() {
   assert(isTemporary() && "Expected this to be temporary");
   assert(!isResolved() && "Expected this to be unresolved");
 
+  // Enable uniquing callbacks.
+  for (auto &Op : mutable_operands())
+    Op.reset(Op.get(), this);
+
   // Make this 'uniqued'.
   Storage = Uniqued;
   if (!countUnresolvedOperands())
@@ -1035,7 +1039,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
 
   // Handle 'dbg' as a special case since it is not stored in the hash table.
   if (KindID == LLVMContext::MD_dbg) {
-    DbgLoc = DebugLoc::getFromDILocation(Node);
+    DbgLoc = DebugLoc(Node);
     return;
   }
   
@@ -1114,7 +1118,7 @@ void Instruction::getAllMetadataImpl(
   Result.clear();
   
   // Handle 'dbg' as a special case since it is not stored in the hash table.
-  if (!DbgLoc.isUnknown()) {
+  if (DbgLoc) {
     Result.push_back(
         std::make_pair((unsigned)LLVMContext::MD_dbg, DbgLoc.getAsMDNode()));
     if (!hasMetadataHashEntry()) return;
diff --git a/lib/IR/UseListOrder.cpp b/lib/IR/UseListOrder.cpp
deleted file mode 100644
index d064e67..0000000
--- a/lib/IR/UseListOrder.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===- UseListOrder.cpp - Implement Use List Order ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implement structures and command-line options for preserving use-list order.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/UseListOrder.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-static cl::opt<bool> PreserveBitcodeUseListOrder(
-    "preserve-bc-use-list-order",
-    cl::desc("Experimental support to preserve bitcode use-list order."),
-    cl::init(false), cl::Hidden);
-
-static cl::opt<bool> PreserveAssemblyUseListOrder(
-    "preserve-ll-use-list-order",
-    cl::desc("Experimental support to preserve assembly use-list order."),
-    cl::init(false), cl::Hidden);
-
-bool llvm::shouldPreserveBitcodeUseListOrder() {
-  return PreserveBitcodeUseListOrder;
-}
-
-bool llvm::shouldPreserveAssemblyUseListOrder() {
-  return PreserveAssemblyUseListOrder;
-}
-
-void llvm::setPreserveBitcodeUseListOrder(bool ShouldPreserve) {
-  PreserveBitcodeUseListOrder = ShouldPreserve;
-}
-
-void llvm::setPreserveAssemblyUseListOrder(bool ShouldPreserve) {
-  PreserveAssemblyUseListOrder = ShouldPreserve;
-}
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 78bfca4..f6eb427 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -525,7 +525,7 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
 
   // Return values from call sites specifically marked as dereferenceable are
   // also okay.
-  if (ImmutableCallSite CS = V) {
+  if (auto CS = ImmutableCallSite(V)) {
     if (uint64_t Bytes = CS.getDereferenceableBytes(0)) {
       Type *Ty = V->getType()->getPointerElementType();
       if (Ty->isSized() && DL.getTypeStoreSize(Ty) <= Bytes)
@@ -595,7 +595,7 @@ bool Value::isDereferenceablePointer(const DataLayout &DL) const {
     APInt DerefBytes(Offset.getBitWidth(), 0);
     if (const Argument *A = dyn_cast<Argument>(BV))
       DerefBytes = A->getDereferenceableBytes();
-    else if (ImmutableCallSite CS = BV)
+    else if (auto CS = ImmutableCallSite(BV))
       DerefBytes = CS.getDereferenceableBytes(0);
 
     if (DerefBytes.getBoolValue() && Offset.isNonNegative()) {
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index fcf48c4..fba78e9 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -87,10 +87,9 @@ struct VerifierSupport {
 
   /// \brief Track the brokenness of the module while recursively visiting.
   bool Broken;
-  bool EverBroken;
 
   explicit VerifierSupport(raw_ostream &OS)
-      : OS(OS), M(nullptr), Broken(false), EverBroken(false) {}
+      : OS(OS), M(nullptr), Broken(false) {}
 
 private:
   void Write(const Value *V) {
@@ -111,6 +110,10 @@ private:
     OS << '\n';
   }
 
+  template <class T> void Write(const MDTupleTypedArrayWrapper<T> &MD) {
+    Write(MD.get());
+  }
+
   void Write(const NamedMDNode *NMD) {
     if (!NMD)
       return;
@@ -145,7 +148,7 @@ public:
   /// something is not correct.
   void CheckFailed(const Twine &Message) {
     OS << Message << '\n';
-    EverBroken = Broken = true;
+    Broken = true;
   }
 
   /// \brief A check failed (with values to print).
@@ -175,6 +178,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   /// \brief Keep track of the metadata nodes that have been checked already.
   SmallPtrSet<const Metadata *, 32> MDNodes;
 
+  /// \brief Track unresolved string-based type references.
+  SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs;
+
   /// \brief The personality function referenced by the LandingPadInsts.
   /// All LandingPadInsts within the same function must use the same
   /// personality function.
@@ -268,8 +274,8 @@ public:
     visitModuleFlags(M);
     visitModuleIdents(M);
 
-    // Verify debug info last.
-    verifyDebugInfo();
+    // Verify type referneces last.
+    verifyTypeRefs();
 
     return !Broken;
   }
@@ -296,8 +302,37 @@ private:
   void visitBasicBlock(BasicBlock &BB);
   void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty);
 
+  template <class Ty> bool isValidMetadataArray(const MDTuple &N);
 #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
 #include "llvm/IR/Metadata.def"
+  void visitMDScope(const MDScope &N);
+  void visitMDDerivedTypeBase(const MDDerivedTypeBase &N);
+  void visitMDVariable(const MDVariable &N);
+  void visitMDLexicalBlockBase(const MDLexicalBlockBase &N);
+  void visitMDTemplateParameter(const MDTemplateParameter &N);
+
+  void visitTemplateParams(const MDNode &N, const Metadata &RawParams);
+
+  /// \brief Check for a valid string-based type reference.
+  ///
+  /// Checks if \c MD is a string-based type reference.  If it is, keeps track
+  /// of it (and its user, \c N) for error messages later.
+  bool isValidUUID(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid type reference.
+  ///
+  /// Checks for subclasses of \a MDType, or \a isValidUUID().
+  bool isTypeRef(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid scope reference.
+  ///
+  /// Checks for subclasses of \a MDScope, or \a isValidUUID().
+  bool isScopeRef(const MDNode &N, const Metadata *MD);
+
+  /// \brief Check for a valid debug info reference.
+  ///
+  /// Checks for subclasses of \a DebugNode, or \a isValidUUID().
+  bool isDIRef(const MDNode &N, const Metadata *MD);
 
   // InstVisitor overrides...
   using InstVisitor<Verifier>::visit;
@@ -371,9 +406,11 @@ private:
   void verifyFrameRecoverIndices();
 
   // Module-level debug info verification...
-  void verifyDebugInfo();
-  void processInstructions(DebugInfoFinder &Finder);
-  void processCallInst(DebugInfoFinder &Finder, const CallInst &CI);
+  void verifyTypeRefs();
+  template <class MapTy>
+  void verifyBitPieceExpression(const DbgInfoIntrinsic &I,
+                                const MapTy &TypeRefs);
+  void visitUnresolvedTypeRef(const MDString *S, const MDNode *N);
 };
 } // End anonymous namespace
 
@@ -566,13 +603,14 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
 void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
   for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
     MDNode *MD = NMD.getOperand(i);
-    if (!MD)
-      continue;
 
     if (NMD.getName() == "llvm.dbg.cu") {
-      Assert(isa<MDCompileUnit>(MD), "invalid compile unit", &NMD, MD);
+      Assert(MD && isa<MDCompileUnit>(MD), "invalid compile unit", &NMD, MD);
     }
 
+    if (!MD)
+      continue;
+
     visitMDNode(*MD);
   }
 }
@@ -658,6 +696,58 @@ void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) {
     visitValueAsMetadata(*V, F);
 }
 
+bool Verifier::isValidUUID(const MDNode &N, const Metadata *MD) {
+  auto *S = dyn_cast<MDString>(MD);
+  if (!S)
+    return false;
+  if (S->getString().empty())
+    return false;
+
+  // Keep track of names of types referenced via UUID so we can check that they
+  // actually exist.
+  UnresolvedTypeRefs.insert(std::make_pair(S, &N));
+  return true;
+}
+
+/// \brief Check if a value can be a reference to a type.
+bool Verifier::isTypeRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<MDType>(MD);
+}
+
+/// \brief Check if a value can be a ScopeRef.
+bool Verifier::isScopeRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<MDScope>(MD);
+}
+
+/// \brief Check if a value can be a debug info ref.
+bool Verifier::isDIRef(const MDNode &N, const Metadata *MD) {
+  return !MD || isValidUUID(N, MD) || isa<DebugNode>(MD);
+}
+
+template <class Ty>
+bool isValidMetadataArrayImpl(const MDTuple &N, bool AllowNull) {
+  for (Metadata *MD : N.operands()) {
+    if (MD) {
+      if (!isa<Ty>(MD))
+        return false;
+    } else {
+      if (!AllowNull)
+        return false;
+    }
+  }
+  return true;
+}
+
+template <class Ty>
+bool isValidMetadataArray(const MDTuple &N) {
+  return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ false);
+}
+
+template <class Ty>
+bool isValidMetadataNullArray(const MDTuple &N) {
+  return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ true);
+}
+
 void Verifier::visitMDLocation(const MDLocation &N) {
   Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()),
          "location requires a valid scope", &N, N.getRawScope());
@@ -669,8 +759,14 @@ void Verifier::visitGenericDebugNode(const GenericDebugNode &N) {
   Assert(N.getTag(), "invalid tag", &N);
 }
 
+void Verifier::visitMDScope(const MDScope &N) {
+  if (auto *F = N.getRawFile())
+    Assert(isa<MDFile>(F), "invalid file", &N, F);
+}
+
 void Verifier::visitMDSubrange(const MDSubrange &N) {
   Assert(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N);
+  Assert(N.getCount() >= -1, "invalid subrange count", &N);
 }
 
 void Verifier::visitMDEnumerator(const MDEnumerator &N) {
@@ -683,7 +779,39 @@ void Verifier::visitMDBasicType(const MDBasicType &N) {
          "invalid tag", &N);
 }
 
+void Verifier::visitMDDerivedTypeBase(const MDDerivedTypeBase &N) {
+  // Common scope checks.
+  visitMDScope(N);
+
+  Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
+  Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
+         N.getBaseType());
+
+  // FIXME: Sink this into the subclass verifies.
+  if (!N.getFile() || N.getFile()->getFilename().empty()) {
+    // Check whether the filename is allowed to be empty.
+    uint16_t Tag = N.getTag();
+    Assert(
+        Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+            Tag == dwarf::DW_TAG_pointer_type ||
+            Tag == dwarf::DW_TAG_ptr_to_member_type ||
+            Tag == dwarf::DW_TAG_reference_type ||
+            Tag == dwarf::DW_TAG_rvalue_reference_type ||
+            Tag == dwarf::DW_TAG_restrict_type ||
+            Tag == dwarf::DW_TAG_array_type ||
+            Tag == dwarf::DW_TAG_enumeration_type ||
+            Tag == dwarf::DW_TAG_subroutine_type ||
+            Tag == dwarf::DW_TAG_inheritance || Tag == dwarf::DW_TAG_friend ||
+            Tag == dwarf::DW_TAG_structure_type ||
+            Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef,
+        "derived/composite type requires a filename", &N, N.getFile());
+  }
+}
+
 void Verifier::visitMDDerivedType(const MDDerivedType &N) {
+  // Common derived type checks.
+  visitMDDerivedTypeBase(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_typedef ||
              N.getTag() == dwarf::DW_TAG_pointer_type ||
              N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
@@ -696,9 +824,30 @@ void Verifier::visitMDDerivedType(const MDDerivedType &N) {
              N.getTag() == dwarf::DW_TAG_inheritance ||
              N.getTag() == dwarf::DW_TAG_friend,
          "invalid tag", &N);
+  if (N.getTag() == dwarf::DW_TAG_ptr_to_member_type) {
+    Assert(isTypeRef(N, N.getExtraData()), "invalid pointer to member type", &N,
+           N.getExtraData());
+  }
+}
+
+static bool hasConflictingReferenceFlags(unsigned Flags) {
+  return (Flags & DebugNode::FlagLValueReference) &&
+         (Flags & DebugNode::FlagRValueReference);
+}
+
+void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) {
+  auto *Params = dyn_cast<MDTuple>(&RawParams);
+  Assert(Params, "invalid template params", &N, &RawParams);
+  for (Metadata *Op : Params->operands()) {
+    Assert(Op && isa<MDTemplateParameter>(Op), "invalid template parameter", &N,
+           Params, Op);
+  }
 }
 
 void Verifier::visitMDCompositeType(const MDCompositeType &N) {
+  // Common derived type checks.
+  visitMDDerivedTypeBase(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_array_type ||
              N.getTag() == dwarf::DW_TAG_structure_type ||
              N.getTag() == dwarf::DW_TAG_union_type ||
@@ -706,10 +855,29 @@ void Verifier::visitMDCompositeType(const MDCompositeType &N) {
              N.getTag() == dwarf::DW_TAG_subroutine_type ||
              N.getTag() == dwarf::DW_TAG_class_type,
          "invalid tag", &N);
+
+  Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
+         "invalid composite elements", &N, N.getRawElements());
+  Assert(isTypeRef(N, N.getRawVTableHolder()), "invalid vtable holder", &N,
+         N.getRawVTableHolder());
+  Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
+         "invalid composite elements", &N, N.getRawElements());
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
+  if (auto *Params = N.getRawTemplateParams())
+    visitTemplateParams(N, *Params);
 }
 
 void Verifier::visitMDSubroutineType(const MDSubroutineType &N) {
   Assert(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N);
+  if (auto *Types = N.getRawTypeArray()) {
+    Assert(isa<MDTuple>(Types), "invalid composite elements", &N, Types);
+    for (Metadata *Ty : N.getTypeArray()->operands()) {
+      Assert(isTypeRef(N, Ty), "invalid subroutine type ref", &N, Types, Ty);
+    }
+  }
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
 }
 
 void Verifier::visitMDFile(const MDFile &N) {
@@ -718,45 +886,195 @@ void Verifier::visitMDFile(const MDFile &N) {
 
 void Verifier::visitMDCompileUnit(const MDCompileUnit &N) {
   Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
+
+  // Don't bother verifying the compilation directory or producer string
+  // as those could be empty.
+  Assert(N.getRawFile() && isa<MDFile>(N.getRawFile()),
+         "invalid file", &N, N.getRawFile());
+  Assert(!N.getFile()->getFilename().empty(), "invalid filename", &N,
+         N.getFile());
+
+  if (auto *Array = N.getRawEnumTypes()) {
+    Assert(isa<MDTuple>(Array), "invalid enum list", &N, Array);
+    for (Metadata *Op : N.getEnumTypes()->operands()) {
+      auto *Enum = dyn_cast_or_null<MDCompositeType>(Op);
+      Assert(Enum && Enum->getTag() == dwarf::DW_TAG_enumeration_type,
+             "invalid enum type", &N, N.getEnumTypes(), Op);
+    }
+  }
+  if (auto *Array = N.getRawRetainedTypes()) {
+    Assert(isa<MDTuple>(Array), "invalid retained type list", &N, Array);
+    for (Metadata *Op : N.getRetainedTypes()->operands()) {
+      Assert(Op && isa<MDType>(Op), "invalid retained type", &N, Op);
+    }
+  }
+  if (auto *Array = N.getRawSubprograms()) {
+    Assert(isa<MDTuple>(Array), "invalid subprogram list", &N, Array);
+    for (Metadata *Op : N.getSubprograms()->operands()) {
+      Assert(Op && isa<MDSubprogram>(Op), "invalid subprogram ref", &N, Op);
+    }
+  }
+  if (auto *Array = N.getRawGlobalVariables()) {
+    Assert(isa<MDTuple>(Array), "invalid global variable list", &N, Array);
+    for (Metadata *Op : N.getGlobalVariables()->operands()) {
+      Assert(Op && isa<MDGlobalVariable>(Op), "invalid global variable ref", &N,
+             Op);
+    }
+  }
+  if (auto *Array = N.getRawImportedEntities()) {
+    Assert(isa<MDTuple>(Array), "invalid imported entity list", &N, Array);
+    for (Metadata *Op : N.getImportedEntities()->operands()) {
+      Assert(Op && isa<MDImportedEntity>(Op), "invalid imported entity ref", &N,
+             Op);
+    }
+  }
 }
 
 void Verifier::visitMDSubprogram(const MDSubprogram &N) {
   Assert(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N);
+  Assert(isScopeRef(N, N.getRawScope()), "invalid scope", &N, N.getRawScope());
+  if (auto *T = N.getRawType())
+    Assert(isa<MDSubroutineType>(T), "invalid subroutine type", &N, T);
+  Assert(isTypeRef(N, N.getRawContainingType()), "invalid containing type", &N,
+         N.getRawContainingType());
+  if (auto *RawF = N.getRawFunction()) {
+    auto *FMD = dyn_cast<ConstantAsMetadata>(RawF);
+    auto *F = FMD ? FMD->getValue() : nullptr;
+    auto *FT = F ? dyn_cast<PointerType>(F->getType()) : nullptr;
+    Assert(F && FT && isa<FunctionType>(FT->getElementType()),
+           "invalid function", &N, F, FT);
+  }
+  if (auto *Params = N.getRawTemplateParams())
+    visitTemplateParams(N, *Params);
+  if (auto *S = N.getRawDeclaration()) {
+    Assert(isa<MDSubprogram>(S) && !cast<MDSubprogram>(S)->isDefinition(),
+           "invalid subprogram declaration", &N, S);
+  }
+  if (auto *RawVars = N.getRawVariables()) {
+    auto *Vars = dyn_cast<MDTuple>(RawVars);
+    Assert(Vars, "invalid variable list", &N, RawVars);
+    for (Metadata *Op : Vars->operands()) {
+      Assert(Op && isa<MDLocalVariable>(Op), "invalid local variable", &N, Vars,
+             Op);
+    }
+  }
+  Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
+         &N);
+
+  auto *F = N.getFunction();
+  if (!F)
+    return;
+
+  // Check that all !dbg attachments lead to back to N (or, at least, another
+  // subprogram that describes the same function).
+  //
+  // FIXME: Check this incrementally while visiting !dbg attachments.
+  // FIXME: Only check when N is the canonical subprogram for F.
+  SmallPtrSet<const MDNode *, 32> Seen;
+  for (auto &BB : *F)
+    for (auto &I : BB) {
+      // Be careful about using MDLocation here since we might be dealing with
+      // broken code (this is the Verifier after all).
+      MDLocation *DL =
+          dyn_cast_or_null<MDLocation>(I.getDebugLoc().getAsMDNode());
+      if (!DL)
+        continue;
+      if (!Seen.insert(DL).second)
+        continue;
+
+      MDLocalScope *Scope = DL->getInlinedAtScope();
+      if (Scope && !Seen.insert(Scope).second)
+        continue;
+
+      MDSubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
+      if (SP && !Seen.insert(SP).second)
+        continue;
+
+      // FIXME: Once N is canonical, check "SP == &N".
+      Assert(SP->describes(F),
+             "!dbg attachment points at wrong subprogram for function", &N, F,
+             &I, DL, Scope, SP);
+    }
 }
 
-void Verifier::visitMDLexicalBlock(const MDLexicalBlock &N) {
+void Verifier::visitMDLexicalBlockBase(const MDLexicalBlockBase &N) {
   Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
+  Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()),
+         "invalid local scope", &N, N.getRawScope());
+}
+
+void Verifier::visitMDLexicalBlock(const MDLexicalBlock &N) {
+  visitMDLexicalBlockBase(N);
+
+  Assert(N.getLine() || !N.getColumn(),
+         "cannot have column info without line info", &N);
 }
 
 void Verifier::visitMDLexicalBlockFile(const MDLexicalBlockFile &N) {
-  Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
+  visitMDLexicalBlockBase(N);
 }
 
 void Verifier::visitMDNamespace(const MDNamespace &N) {
   Assert(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
+  if (auto *S = N.getRawScope())
+    Assert(isa<MDScope>(S), "invalid scope ref", &N, S);
+}
+
+void Verifier::visitMDTemplateParameter(const MDTemplateParameter &N) {
+  Assert(isTypeRef(N, N.getType()), "invalid type ref", &N, N.getType());
 }
 
 void Verifier::visitMDTemplateTypeParameter(const MDTemplateTypeParameter &N) {
+  visitMDTemplateParameter(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag",
          &N);
 }
 
 void Verifier::visitMDTemplateValueParameter(
     const MDTemplateValueParameter &N) {
+  visitMDTemplateParameter(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_template_value_parameter ||
              N.getTag() == dwarf::DW_TAG_GNU_template_template_param ||
              N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack,
          "invalid tag", &N);
 }
 
+void Verifier::visitMDVariable(const MDVariable &N) {
+  if (auto *S = N.getRawScope())
+    Assert(isa<MDScope>(S), "invalid scope", &N, S);
+  Assert(isTypeRef(N, N.getRawType()), "invalid type ref", &N, N.getRawType());
+  if (auto *F = N.getRawFile())
+    Assert(isa<MDFile>(F), "invalid file", &N, F);
+}
+
 void Verifier::visitMDGlobalVariable(const MDGlobalVariable &N) {
+  // Checks common to all variables.
+  visitMDVariable(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
+  Assert(!N.getName().empty(), "missing global variable name", &N);
+  if (auto *V = N.getRawVariable()) {
+    Assert(isa<ConstantAsMetadata>(V) &&
+               !isa<Function>(cast<ConstantAsMetadata>(V)->getValue()),
+           "invalid global varaible ref", &N, V);
+  }
+  if (auto *Member = N.getRawStaticDataMemberDeclaration()) {
+    Assert(isa<MDDerivedType>(Member), "invalid static data member declaration",
+           &N, Member);
+  }
 }
 
 void Verifier::visitMDLocalVariable(const MDLocalVariable &N) {
+  // Checks common to all variables.
+  visitMDVariable(N);
+
   Assert(N.getTag() == dwarf::DW_TAG_auto_variable ||
              N.getTag() == dwarf::DW_TAG_arg_variable,
          "invalid tag", &N);
+  Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()),
+         "local variable requires a valid scope", &N, N.getRawScope());
 }
 
 void Verifier::visitMDExpression(const MDExpression &N) {
@@ -765,12 +1083,20 @@ void Verifier::visitMDExpression(const MDExpression &N) {
 
 void Verifier::visitMDObjCProperty(const MDObjCProperty &N) {
   Assert(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
+  if (auto *T = N.getRawType())
+    Assert(isa<MDType>(T), "invalid type ref", &N, T);
+  if (auto *F = N.getRawFile())
+    Assert(isa<MDFile>(F), "invalid file", &N, F);
 }
 
 void Verifier::visitMDImportedEntity(const MDImportedEntity &N) {
   Assert(N.getTag() == dwarf::DW_TAG_imported_module ||
              N.getTag() == dwarf::DW_TAG_imported_declaration,
          "invalid tag", &N);
+  if (auto *S = N.getRawScope())
+    Assert(isa<MDScope>(S), "invalid scope for imported entity", &N, S);
+  Assert(isDIRef(N, N.getEntity()), "invalid imported entity", &N,
+         N.getEntity());
 }
 
 void Verifier::visitComdat(const Comdat &C) {
@@ -2133,7 +2459,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
   SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
   Type *ElTy =
-    GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
+      GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
   Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP);
 
   Assert(GEP.getType()->getScalarType()->isPointerTy() &&
@@ -2214,9 +2540,7 @@ void Verifier::visitRangeMetadata(Instruction& I,
 void Verifier::visitLoadInst(LoadInst &LI) {
   PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
   Assert(PTy, "Load operand must be a pointer.", &LI);
-  Type *ElTy = PTy->getElementType();
-  Assert(ElTy == LI.getType(),
-         "Load result type does not match pointer operand type!", &LI, ElTy);
+  Type *ElTy = LI.getType();
   Assert(LI.getAlignment() <= Value::MaximumAlignment,
          "huge alignment values are unsupported", &LI);
   if (LI.isAtomic()) {
@@ -2885,6 +3209,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     Assert(!SawFrameEscape,
            "multiple calls to llvm.frameescape in one function", &CI);
     for (Value *Arg : CI.arg_operands()) {
+      if (isa<ConstantPointerNull>(Arg))
+        continue; // Null values are allowed as placeholders.
       auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
       Assert(AI && AI->isStaticAlloca(),
              "llvm.frameescape only accepts static allocas", &CI);
@@ -2909,16 +3235,11 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     break;
   }
 
-  case Intrinsic::eh_unwindhelp: {
-    auto *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
-    Assert(AI && AI->isStaticAlloca(),
-           "llvm.eh.unwindhelp requires a static alloca", &CI);
-    break;
-  }
-
   case Intrinsic::experimental_gc_statepoint:
     Assert(!CI.isInlineAsm(),
            "gc.statepoint support for inline assembly unimplemented", &CI);
+    Assert(CI.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", &CI);
 
     VerifyStatepoint(ImmutableCallSite(&CI));
     break;
@@ -2926,6 +3247,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   case Intrinsic::experimental_gc_result_float:
   case Intrinsic::experimental_gc_result_ptr:
   case Intrinsic::experimental_gc_result: {
+    Assert(CI.getParent()->getParent()->hasGC(),
+           "Enclosing function does not use GC.", &CI);
     // Are we tied to a statepoint properly?
     CallSite StatepointCS(CI.getArgOperand(0));
     const Function *StatepointFn =
@@ -3035,6 +3358,25 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   };
 }
 
+/// \brief Carefully grab the subprogram from a local scope.
+///
+/// This carefully grabs the subprogram from a local scope, avoiding the
+/// built-in assertions that would typically fire.
+static MDSubprogram *getSubprogram(Metadata *LocalScope) {
+  if (!LocalScope)
+    return nullptr;
+
+  if (auto *SP = dyn_cast<MDSubprogram>(LocalScope))
+    return SP;
+
+  if (auto *LB = dyn_cast<MDLexicalBlockBase>(LocalScope))
+    return getSubprogram(LB->getRawScope());
+
+  // Just return null; broken scope chains are checked elsewhere.
+  assert(!isa<MDLocalScope>(LocalScope) && "Unknown type of local scope");
+  return nullptr;
+}
+
 template <class DbgIntrinsicTy>
 void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
   auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata();
@@ -3047,61 +3389,145 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
   Assert(isa<MDExpression>(DII.getRawExpression()),
          "invalid llvm.dbg." + Kind + " intrinsic expression", &DII,
          DII.getRawExpression());
-}
 
-void Verifier::verifyDebugInfo() {
-  // Run the debug info verifier only if the regular verifier succeeds, since
-  // sometimes checks that have already failed will cause crashes here.
-  if (EverBroken || !VerifyDebugInfo)
-    return;
+  // Ignore broken !dbg attachments; they're checked elsewhere.
+  if (MDNode *N = DII.getDebugLoc().getAsMDNode())
+    if (!isa<MDLocation>(N))
+      return;
 
-  DebugInfoFinder Finder;
-  Finder.processModule(*M);
-  processInstructions(Finder);
+  BasicBlock *BB = DII.getParent();
+  Function *F = BB ? BB->getParent() : nullptr;
+
+  // The scopes for variables and !dbg attachments must agree.
+  MDLocalVariable *Var = DII.getVariable();
+  MDLocation *Loc = DII.getDebugLoc();
+  Assert(Loc, "llvm.dbg." + Kind + " intrinsic requires a !dbg attachment",
+         &DII, BB, F);
+
+  MDSubprogram *VarSP = getSubprogram(Var->getRawScope());
+  MDSubprogram *LocSP = getSubprogram(Loc->getRawScope());
+  if (!VarSP || !LocSP)
+    return; // Broken scope chains are checked elsewhere.
+
+  Assert(VarSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind +
+                             " variable and !dbg attachment",
+         &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
+         Loc->getScope()->getSubprogram());
+}
+
+template <class MapTy>
+static uint64_t getVariableSize(const MDLocalVariable &V, const MapTy &Map) {
+  // Be careful of broken types (checked elsewhere).
+  const Metadata *RawType = V.getRawType();
+  while (RawType) {
+    // Try to get the size directly.
+    if (auto *T = dyn_cast<MDType>(RawType))
+      if (uint64_t Size = T->getSizeInBits())
+        return Size;
+
+    if (auto *DT = dyn_cast<MDDerivedType>(RawType)) {
+      // Look at the base type.
+      RawType = DT->getRawBaseType();
+      continue;
+    }
 
-  // Verify Debug Info.
-  //
-  // NOTE:  The loud braces are necessary for MSVC compatibility.
-  for (DICompileUnit CU : Finder.compile_units()) {
-    Assert(CU.Verify(), "DICompileUnit does not Verify!", CU);
-  }
-  for (DISubprogram S : Finder.subprograms()) {
-    Assert(S.Verify(), "DISubprogram does not Verify!", S);
-  }
-  for (DIGlobalVariable GV : Finder.global_variables()) {
-    Assert(GV.Verify(), "DIGlobalVariable does not Verify!", GV);
-  }
-  for (DIType T : Finder.types()) {
-    Assert(T.Verify(), "DIType does not Verify!", T);
+    if (auto *S = dyn_cast<MDString>(RawType)) {
+      // Don't error on missing types (checked elsewhere).
+      RawType = Map.lookup(S);
+      continue;
+    }
+
+    // Missing type or size.
+    break;
   }
-  for (DIScope S : Finder.scopes()) {
-    Assert(S.Verify(), "DIScope does not Verify!", S);
+
+  // Fail gracefully.
+  return 0;
+}
+
+template <class MapTy>
+void Verifier::verifyBitPieceExpression(const DbgInfoIntrinsic &I,
+                                        const MapTy &TypeRefs) {
+  MDLocalVariable *V;
+  MDExpression *E;
+  if (auto *DVI = dyn_cast<DbgValueInst>(&I)) {
+    V = dyn_cast_or_null<MDLocalVariable>(DVI->getRawVariable());
+    E = dyn_cast_or_null<MDExpression>(DVI->getRawExpression());
+  } else {
+    auto *DDI = cast<DbgDeclareInst>(&I);
+    V = dyn_cast_or_null<MDLocalVariable>(DDI->getRawVariable());
+    E = dyn_cast_or_null<MDExpression>(DDI->getRawExpression());
   }
+
+  // We don't know whether this intrinsic verified correctly.
+  if (!V || !E || !E->isValid())
+    return;
+
+  // Nothing to do if this isn't a bit piece expression.
+  if (!E->isBitPiece())
+    return;
+
+  // If there's no size, the type is broken, but that should be checked
+  // elsewhere.
+  uint64_t VarSize = getVariableSize(*V, TypeRefs);
+  if (!VarSize)
+    return;
+
+  unsigned PieceSize = E->getBitPieceSize();
+  unsigned PieceOffset = E->getBitPieceOffset();
+  Assert(PieceSize + PieceOffset <= VarSize,
+         "piece is larger than or outside of variable", &I, V, E);
+  Assert(PieceSize != VarSize, "piece covers entire variable", &I, V, E);
 }
 
-void Verifier::processInstructions(DebugInfoFinder &Finder) {
-  for (const Function &F : *M)
-    for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
-      if (MDNode *MD = I->getMetadata(LLVMContext::MD_dbg))
-        Finder.processLocation(*M, DILocation(MD));
-      if (const CallInst *CI = dyn_cast<CallInst>(&*I))
-        processCallInst(Finder, *CI);
-    }
+void Verifier::visitUnresolvedTypeRef(const MDString *S, const MDNode *N) {
+  // This is in its own function so we get an error for each bad type ref (not
+  // just the first).
+  Assert(false, "unresolved type ref", S, N);
 }
 
-void Verifier::processCallInst(DebugInfoFinder &Finder, const CallInst &CI) {
-  if (Function *F = CI.getCalledFunction())
-    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
-      switch (ID) {
-      case Intrinsic::dbg_declare:
-        Finder.processDeclare(*M, cast<DbgDeclareInst>(&CI));
-        break;
-      case Intrinsic::dbg_value:
-        Finder.processValue(*M, cast<DbgValueInst>(&CI));
-        break;
-      default:
-        break;
-      }
+void Verifier::verifyTypeRefs() {
+  auto *CUs = M->getNamedMetadata("llvm.dbg.cu");
+  if (!CUs)
+    return;
+
+  // Visit all the compile units again to map the type references.
+  SmallDenseMap<const MDString *, const MDType *, 32> TypeRefs;
+  for (auto *CU : CUs->operands())
+    if (auto Ts = cast<MDCompileUnit>(CU)->getRetainedTypes())
+      for (MDType *Op : Ts)
+        if (auto *T = dyn_cast<MDCompositeType>(Op))
+          if (auto *S = T->getRawIdentifier()) {
+            UnresolvedTypeRefs.erase(S);
+            TypeRefs.insert(std::make_pair(S, T));
+          }
+
+  // Verify debug info intrinsic bit piece expressions.  This needs a second
+  // pass through the intructions, since we haven't built TypeRefs yet when
+  // verifying functions, and simply queuing the DbgInfoIntrinsics to evaluate
+  // later/now would queue up some that could be later deleted.
+  for (const Function &F : *M)
+    for (const BasicBlock &BB : F)
+      for (const Instruction &I : BB)
+        if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I))
+          verifyBitPieceExpression(*DII, TypeRefs);
+
+  // Return early if all typerefs were resolved.
+  if (UnresolvedTypeRefs.empty())
+    return;
+
+  // Sort the unresolved references by name so the output is deterministic.
+  typedef std::pair<const MDString *, const MDNode *> TypeRef;
+  SmallVector<TypeRef, 32> Unresolved(UnresolvedTypeRefs.begin(),
+                                      UnresolvedTypeRefs.end());
+  std::sort(Unresolved.begin(), Unresolved.end(),
+            [](const TypeRef &LHS, const TypeRef &RHS) {
+    return LHS.first->getString() < RHS.first->getString();
+  });
+
+  // Visit the unresolved refs (printing out the errors).
+  for (const TypeRef &TR : Unresolved)
+    visitUnresolvedTypeRef(TR.first, TR.second);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index a6f980b..b4a7011 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -38,7 +38,6 @@
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
@@ -215,7 +214,8 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
   }
 
   // write bitcode to it
-  WriteBitcodeToFile(IRLinker.getModule(), Out.os());
+  WriteBitcodeToFile(IRLinker.getModule(), Out.os(),
+                     /* ShouldPreserveUseListOrder */ true);
   Out.os().close();
 
   if (Out.os().has_error()) {
@@ -566,24 +566,20 @@ bool LTOCodeGenerator::optimize(bool DisableInline,
   return true;
 }
 
-bool LTOCodeGenerator::compileOptimized(raw_ostream &out, std::string &errMsg) {
+bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out,
+                                        std::string &errMsg) {
   if (!this->determineTarget(errMsg))
     return false;
 
   Module *mergedModule = IRLinker.getModule();
 
-  // Mark which symbols can not be internalized
-  this->applyScopeRestrictions();
-
   legacy::PassManager codeGenPasses;
 
-  formatted_raw_ostream Out(out);
-
   // If the bitcode files contain ARC code and were compiled with optimization,
   // the ObjCARCContractPass must be run, so do it unconditionally here.
   codeGenPasses.add(createObjCARCContractPass());
 
-  if (TargetMach->addPassesToEmitFile(codeGenPasses, Out,
+  if (TargetMach->addPassesToEmitFile(codeGenPasses, out,
                                       TargetMachine::CGFT_ObjectFile)) {
     errMsg = "target file type not supported";
     return false;
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 49aa97d..5cdbca6 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -267,7 +267,7 @@ LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
       Constant *cn = gvn->getInitializer();
       if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
         if (ca->isCString()) {
-          name = ".objc_class_name_" + ca->getAsCString().str();
+          name = (".objc_class_name_" + ca->getAsCString()).str();
           return true;
         }
       }
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 21edc50..03ab9fb 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -1269,15 +1269,11 @@ void ModuleLinker::stripReplacedSubprograms() {
   if (!CompileUnits)
     return;
   for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
-    DICompileUnit CU(CompileUnits->getOperand(I));
+    DICompileUnit CU = cast<MDCompileUnit>(CompileUnits->getOperand(I));
     assert(CU && "Expected valid compile unit");
 
-    DITypedArray<DISubprogram> SPs(CU.getSubprograms());
-    assert(SPs && "Expected valid subprogram array");
-
-    for (unsigned S = 0, SE = SPs.getNumElements(); S != SE; ++S) {
-      DISubprogram SP = SPs.getElement(S);
-      if (!SP || !SP.getFunction() || !Functions.count(SP.getFunction()))
+    for (MDSubprogram *SP : CU->getSubprograms()) {
+      if (!SP || !SP->getFunction() || !Functions.count(SP->getFunction()))
         continue;
 
       // Prevent DebugInfoFinder from tagging this as the canonical subprogram,
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index c99a3ee..8cb01c4 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -79,17 +79,6 @@ public:
                    uint8_t other, uint32_t shndx, bool Reserved);
 };
 
-struct ELFRelocationEntry {
-  uint64_t Offset; // Where is the relocation.
-  const MCSymbol *Symbol;       // The symbol to relocate with.
-  unsigned Type;   // The type of the relocation.
-  uint64_t Addend; // The addend to use.
-
-  ELFRelocationEntry(uint64_t Offset, const MCSymbol *Symbol, unsigned Type,
-                     uint64_t Addend)
-      : Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend) {}
-};
-
 class ELFObjectWriter : public MCObjectWriter {
   FragmentWriter FWriter;
 
@@ -103,22 +92,13 @@ class ELFObjectWriter : public MCObjectWriter {
     static bool isLocal(const MCSymbolData &Data, bool isUsedInReloc);
     static bool IsELFMetaDataSection(const MCSectionData &SD);
     static uint64_t DataSectionSize(const MCSectionData &SD);
-    static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
-                                       const MCSectionData &SD);
     static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
                                           const MCSectionData &SD);
 
-    void WriteDataSectionData(MCAssembler &Asm,
-                              const MCAsmLayout &Layout,
-                              const MCSectionELF &Section);
-
-    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
-      return Kind == X86::reloc_riprel_4byte ||
-        Kind == X86::reloc_riprel_4byte_movq_load;
-    }*/
+    void writeDataSectionData(MCAssembler &Asm, const MCAsmLayout &Layout,
+                              const MCSectionData &SD);
 
-    /// ELFSymbolData - Helper struct for containing some precomputed
-    /// information on symbols.
+    /// Helper struct for containing some precomputed information on symbols.
     struct ELFSymbolData {
       MCSymbolData *SymbolData;
       uint64_t StringIndex;
@@ -185,7 +165,7 @@ class ELFObjectWriter : public MCObjectWriter {
     }
 
   public:
-    ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &OS,
+    ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_pwrite_stream &OS,
                     bool IsLittleEndian)
         : MCObjectWriter(OS, IsLittleEndian), FWriter(IsLittleEndian),
           TargetObjectWriter(MOTW), NeedsGOT(false) {}
@@ -204,7 +184,7 @@ class ELFObjectWriter : public MCObjectWriter {
       MCObjectWriter::reset();
     }
 
-    virtual ~ELFObjectWriter();
+    ~ELFObjectWriter() override;
 
     void WriteWord(uint64_t W) {
       if (is64Bit())
@@ -218,7 +198,6 @@ class ELFObjectWriter : public MCObjectWriter {
     }
 
     void WriteHeader(const MCAssembler &Asm,
-                     uint64_t SectionDataSize,
                      unsigned NumberOfSections);
 
     void WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
@@ -245,8 +224,6 @@ class ELFObjectWriter : public MCObjectWriter {
     typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
     // Map from a signature symbol to the group section
     typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
-    // Map from a section to the section with the relocations
-    typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
     // Map from a section to its offset
     typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
 
@@ -255,23 +232,18 @@ class ELFObjectWriter : public MCObjectWriter {
     /// \param Asm - The assembler.
     /// \param SectionIndexMap - Maps a section to its index.
     /// \param RevGroupMap - Maps a signature symbol to the group section.
-    /// \param NumRegularSections - Number of non-relocation sections.
     void computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
                             const SectionIndexMapTy &SectionIndexMap,
-                            const RevGroupMapTy &RevGroupMap,
-                            unsigned NumRegularSections);
+                            const RevGroupMapTy &RevGroupMap);
 
-    void computeIndexMap(MCAssembler &Asm,
-                         SectionIndexMapTy &SectionIndexMap,
-                         RelMapTy &RelMap);
+    void computeIndexMap(MCAssembler &Asm, SectionIndexMapTy &SectionIndexMap);
 
     MCSectionData *createRelocationSection(MCAssembler &Asm,
                                            const MCSectionData &SD);
 
     void CompressDebugSections(MCAssembler &Asm, MCAsmLayout &Layout);
 
-    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
-                          const RelMapTy &RelMap);
+    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout);
 
     void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
                                 SectionIndexMapTy &SectionIndexMap);
@@ -279,23 +251,18 @@ class ELFObjectWriter : public MCObjectWriter {
     // Create the sections that show up in the symbol table. Currently
     // those are the .note.GNU-stack section and the group sections.
     void createIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                               GroupMapTy &GroupMap,
-                               RevGroupMapTy &RevGroupMap,
-                               SectionIndexMapTy &SectionIndexMap,
-                               RelMapTy &RelMap);
+                               GroupMapTy &GroupMap, RevGroupMapTy &RevGroupMap,
+                               SectionIndexMapTy &SectionIndexMap);
 
     void ExecutePostLayoutBinding(MCAssembler &Asm,
                                   const MCAsmLayout &Layout) override;
 
-    void writeSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
+    void writeSectionHeader(ArrayRef<const MCSectionELF *> Sections,
+                            MCAssembler &Asm, const GroupMapTy &GroupMap,
                             const MCAsmLayout &Layout,
                             const SectionIndexMapTy &SectionIndexMap,
-                            const RelMapTy &RelMap,
                             const SectionOffsetMapTy &SectionOffsetMap);
 
-    void ComputeSectionOrder(MCAssembler &Asm,
-                             std::vector<const MCSectionELF*> &Sections);
-
     void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
                           uint64_t Address, uint64_t Offset,
                           uint64_t Size, uint32_t Link, uint32_t Info,
@@ -308,6 +275,7 @@ class ELFObjectWriter : public MCObjectWriter {
     bool
     IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                            const MCSymbolData &DataA,
+                                           const MCSymbolData *DataB,
                                            const MCFragment &FB,
                                            bool InSet,
                                            bool IsPCRel) const override;
@@ -317,7 +285,6 @@ class ELFObjectWriter : public MCObjectWriter {
     void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
     void writeSection(MCAssembler &Asm,
                       const SectionIndexMapTy &SectionIndexMap,
-                      const RelMapTy &RelMap,
                       uint32_t GroupSymbolIndex,
                       uint64_t Offset, uint64_t Size, uint64_t Alignment,
                       const MCSectionELF &Section);
@@ -384,8 +351,6 @@ void SymbolTableWriter::writeSymbol(uint32_t name, uint8_t info, uint64_t value,
 
   uint16_t Index = LargeIndex ? uint16_t(ELF::SHN_XINDEX) : shndx;
 
-  raw_svector_ostream OS(SymtabF->getContents());
-
   if (Is64Bit) {
     write(*SymtabF, name);  // st_name
     write(*SymtabF, info);  // st_info
@@ -438,7 +403,6 @@ ELFObjectWriter::~ELFObjectWriter()
 
 // Emit the ELF header.
 void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
-                                  uint64_t SectionDataSize,
                                   unsigned NumberOfSections) {
   // ELF Header
   // ----------
@@ -472,8 +436,7 @@ void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
   Write32(ELF::EV_CURRENT);         // e_version
   WriteWord(0);                    // e_entry, no entry point in .o file
   WriteWord(0);                    // e_phoff, no program header for .o
-  WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
-            sizeof(ELF::Elf32_Ehdr)));  // e_shoff = sec hdr table off in bytes
+  WriteWord(0);                     // e_shoff = sec hdr table off in bytes
 
   // e_flags = whatever the target wants
   Write32(Asm.getELFHeaderEFlags());
@@ -628,7 +591,7 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
 
   if (ESize) {
     int64_t Res;
-    if (!ESize->EvaluateAsAbsolute(Res, Layout))
+    if (!ESize->evaluateKnownAbsolute(Res, Layout))
       report_fatal_error("Size expression must be absolute.");
     Size = Res;
   }
@@ -969,8 +932,7 @@ bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isUsedInReloc) {
 }
 
 void ELFObjectWriter::computeIndexMap(MCAssembler &Asm,
-                                      SectionIndexMapTy &SectionIndexMap,
-                                      RelMapTy &RelMap) {
+                                      SectionIndexMapTy &SectionIndexMap) {
   unsigned Index = 1;
   for (MCAssembler::iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
@@ -994,17 +956,15 @@ void ELFObjectWriter::computeIndexMap(MCAssembler &Asm,
     if (MCSectionData *RelSD = createRelocationSection(Asm, SD)) {
       const MCSectionELF *RelSection =
           static_cast<const MCSectionELF *>(&RelSD->getSection());
-      RelMap[RelSection] = &Section;
       SectionIndexMap[RelSection] = Index++;
     }
   }
 }
 
-void
-ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
-                                    const SectionIndexMapTy &SectionIndexMap,
-                                    const RevGroupMapTy &RevGroupMap,
-                                    unsigned NumRegularSections) {
+void ELFObjectWriter::computeSymbolTable(
+    MCAssembler &Asm, const MCAsmLayout &Layout,
+    const SectionIndexMapTy &SectionIndexMap,
+    const RevGroupMapTy &RevGroupMap) {
   // FIXME: Is this the correct place to do this?
   // FIXME: Why is an undefined reference to _GLOBAL_OFFSET_TABLE_ needed?
   if (NeedsGOT) {
@@ -1167,15 +1127,12 @@ ELFObjectWriter::createRelocationSection(MCAssembler &Asm,
     EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
 
   unsigned Flags = 0;
-  StringRef Group = "";
-  if (Section.getFlags() & ELF::SHF_GROUP) {
+  if (Section.getFlags() & ELF::SHF_GROUP)
     Flags = ELF::SHF_GROUP;
-    Group = Section.getGroup()->getName();
-  }
 
-  const MCSectionELF *RelaSection = Ctx.getELFSection(
+  const MCSectionELF *RelaSection = Ctx.createELFRelSection(
       RelaSectionName, hasRelocationAddend() ? ELF::SHT_RELA : ELF::SHT_REL,
-      Flags, EntrySize, Group, true);
+      Flags, EntrySize, Section.getGroup(), &Section);
   return &Asm.getOrCreateSectionData(*RelaSection);
 }
 
@@ -1324,8 +1281,7 @@ void ELFObjectWriter::CompressDebugSections(MCAssembler &Asm,
   }
 }
 
-void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
-                                       const RelMapTy &RelMap) {
+void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
   for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
     MCSectionData &RelSD = *it;
     const MCSectionELF &RelSection =
@@ -1335,7 +1291,7 @@ void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
     if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA)
       continue;
 
-    const MCSectionELF *Section = RelMap.lookup(&RelSection);
+    const MCSectionELF *Section = RelSection.getAssociatedSection();
     MCSectionData &SD = Asm.getOrCreateSectionData(*Section);
     RelSD.setAlignment(is64Bit() ? 8 : 4);
 
@@ -1362,31 +1318,14 @@ void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
   WriteWord(EntrySize); // sh_entsize
 }
 
-// ELF doesn't require relocations to be in any order. We sort by the r_offset,
-// just to match gnu as for easier comparison. The use type is an arbitrary way
-// of making the sort deterministic.
-static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
-  const ELFRelocationEntry &A = *AP;
-  const ELFRelocationEntry &B = *BP;
-  if (A.Offset != B.Offset)
-    return B.Offset - A.Offset;
-  if (B.Type != A.Type)
-    return A.Type - B.Type;
-  //llvm_unreachable("ELFRelocs might be unstable!");
-  return 0;
-}
-
-static void sortRelocs(const MCAssembler &Asm,
-                       std::vector<ELFRelocationEntry> &Relocs) {
-  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
-}
-
 void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
                                                MCDataFragment *F,
                                                const MCSectionData *SD) {
   std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
 
-  sortRelocs(Asm, Relocs);
+  // Sort the relocation entries. Most targets just sort by Offset, but some
+  // (e.g., MIPS) have additional constraints.
+  TargetObjectWriter->sortRelocs(Asm, Relocs);
 
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
     const ELFRelocationEntry &Entry = Relocs[e - i - 1];
@@ -1473,12 +1412,9 @@ void ELFObjectWriter::CreateMetadataSections(
                           ShStrTabBuilder.data().end());
 }
 
-void ELFObjectWriter::createIndexedSections(MCAssembler &Asm,
-                                            MCAsmLayout &Layout,
-                                            GroupMapTy &GroupMap,
-                                            RevGroupMapTy &RevGroupMap,
-                                            SectionIndexMapTy &SectionIndexMap,
-                                            RelMapTy &RelMap) {
+void ELFObjectWriter::createIndexedSections(
+    MCAssembler &Asm, MCAsmLayout &Layout, GroupMapTy &GroupMap,
+    RevGroupMapTy &RevGroupMap, SectionIndexMapTy &SectionIndexMap) {
   MCContext &Ctx = Asm.getContext();
 
   // Build the groups
@@ -1502,7 +1438,7 @@ void ELFObjectWriter::createIndexedSections(MCAssembler &Asm,
     GroupMap[Group] = SignatureSymbol;
   }
 
-  computeIndexMap(Asm, SectionIndexMap, RelMap);
+  computeIndexMap(Asm, SectionIndexMap);
 
   // Add sections to the groups
   for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
@@ -1522,7 +1458,6 @@ void ELFObjectWriter::createIndexedSections(MCAssembler &Asm,
 
 void ELFObjectWriter::writeSection(MCAssembler &Asm,
                                    const SectionIndexMapTy &SectionIndexMap,
-                                   const RelMapTy &RelMap,
                                    uint32_t GroupSymbolIndex,
                                    uint64_t Offset, uint64_t Size,
                                    uint64_t Alignment,
@@ -1531,16 +1466,19 @@ void ELFObjectWriter::writeSection(MCAssembler &Asm,
   uint64_t sh_info = 0;
 
   switch(Section.getType()) {
+  default:
+    // Nothing to do.
+    break;
+
   case ELF::SHT_DYNAMIC:
     sh_link = ShStrTabBuilder.getOffset(Section.getSectionName());
-    sh_info = 0;
     break;
 
   case ELF::SHT_REL:
   case ELF::SHT_RELA: {
     sh_link = SymbolTableIndex;
     assert(sh_link && ".symtab not found");
-    const MCSectionELF *InfoSection = RelMap.find(&Section)->second;
+    const MCSectionELF *InfoSection = Section.getAssociatedSection();
     sh_info = SectionIndexMap.lookup(InfoSection);
     break;
   }
@@ -1555,45 +1493,15 @@ void ELFObjectWriter::writeSection(MCAssembler &Asm,
     sh_link = SymbolTableIndex;
     break;
 
-  case ELF::SHT_PROGBITS:
-  case ELF::SHT_STRTAB:
-  case ELF::SHT_NOBITS:
-  case ELF::SHT_NOTE:
-  case ELF::SHT_NULL:
-  case ELF::SHT_ARM_ATTRIBUTES:
-  case ELF::SHT_INIT_ARRAY:
-  case ELF::SHT_FINI_ARRAY:
-  case ELF::SHT_PREINIT_ARRAY:
-  case ELF::SHT_X86_64_UNWIND:
-  case ELF::SHT_MIPS_REGINFO:
-  case ELF::SHT_MIPS_OPTIONS:
-  case ELF::SHT_MIPS_ABIFLAGS:
-    // Nothing to do.
-    break;
-
   case ELF::SHT_GROUP:
     sh_link = SymbolTableIndex;
     sh_info = GroupSymbolIndex;
     break;
-
-  default:
-    llvm_unreachable("FIXME: sh_type value not supported!");
   }
 
   if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
-      Section.getType() == ELF::SHT_ARM_EXIDX) {
-    StringRef SecName(Section.getSectionName());
-    if (SecName == ".ARM.exidx") {
-      sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection(
-          ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC));
-    } else if (SecName.startswith(".ARM.exidx")) {
-      StringRef GroupName =
-          Section.getGroup() ? Section.getGroup()->getName() : "";
-      sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection(
-          SecName.substr(sizeof(".ARM.exidx") - 1), ELF::SHT_PROGBITS,
-          ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, 0, GroupName));
-    }
-  }
+      Section.getType() == ELF::SHT_ARM_EXIDX)
+    sh_link = SectionIndexMap.lookup(Section.getAssociatedSection());
 
   WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()),
                    Section.getType(),
@@ -1617,13 +1525,6 @@ uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) {
   return Ret;
 }
 
-uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout,
-                                             const MCSectionData &SD) {
-  if (IsELFMetaDataSection(SD))
-    return DataSectionSize(SD);
-  return Layout.getSectionFileSize(&SD);
-}
-
 uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
                                                 const MCSectionData &SD) {
   if (IsELFMetaDataSection(SD))
@@ -1631,14 +1532,9 @@ uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
   return Layout.getSectionAddressSize(&SD);
 }
 
-void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
+void ELFObjectWriter::writeDataSectionData(MCAssembler &Asm,
                                            const MCAsmLayout &Layout,
-                                           const MCSectionELF &Section) {
-  const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
-
-  uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment());
-  WriteZeros(Padding);
-
+                                           const MCSectionData &SD) {
   if (IsELFMetaDataSection(SD)) {
     for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
          ++i) {
@@ -1652,28 +1548,20 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
 }
 
 void ELFObjectWriter::writeSectionHeader(
-    MCAssembler &Asm, const GroupMapTy &GroupMap, const MCAsmLayout &Layout,
-    const SectionIndexMapTy &SectionIndexMap, const RelMapTy &RelMap,
+    ArrayRef<const MCSectionELF *> Sections, MCAssembler &Asm,
+    const GroupMapTy &GroupMap, const MCAsmLayout &Layout,
+    const SectionIndexMapTy &SectionIndexMap,
     const SectionOffsetMapTy &SectionOffsetMap) {
-  const unsigned NumSections = Asm.size() + 1;
-
-  std::vector<const MCSectionELF*> Sections;
-  Sections.resize(NumSections - 1);
-
-  for (SectionIndexMapTy::const_iterator i=
-         SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
-    const std::pair<const MCSectionELF*, uint32_t> &p = *i;
-    Sections[p.second - 1] = p.first;
-  }
+  const unsigned NumSections = Asm.size();
 
   // Null section first.
   uint64_t FirstSectionSize =
-    NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+      (NumSections + 1) >= ELF::SHN_LORESERVE ? NumSections + 1 : 0;
   uint32_t FirstSectionLink =
     ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
   WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
 
-  for (unsigned i = 0; i < NumSections - 1; ++i) {
+  for (unsigned i = 0; i < NumSections; ++i) {
     const MCSectionELF &Section = *Sections[i];
     const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
     uint32_t GroupSymbolIndex;
@@ -1685,39 +1573,9 @@ void ELFObjectWriter::writeSectionHeader(
 
     uint64_t Size = GetSectionAddressSize(Layout, SD);
 
-    writeSection(Asm, SectionIndexMap, RelMap, GroupSymbolIndex,
-                 SectionOffsetMap.lookup(&Section), Size,
-                 SD.getAlignment(), Section);
-  }
-}
-
-void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm,
-                                  std::vector<const MCSectionELF*> &Sections) {
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() == ELF::SHT_GROUP)
-      Sections.push_back(&Section);
-  }
-
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() != ELF::SHT_GROUP &&
-        Section.getType() != ELF::SHT_REL &&
-        Section.getType() != ELF::SHT_RELA)
-      Sections.push_back(&Section);
-  }
-
-  for (MCAssembler::iterator it = Asm.begin(),
-         ie = Asm.end(); it != ie; ++it) {
-    const MCSectionELF &Section =
-      static_cast<const MCSectionELF &>(it->getSection());
-    if (Section.getType() == ELF::SHT_REL ||
-        Section.getType() == ELF::SHT_RELA)
-      Sections.push_back(&Section);
+    writeSection(Asm, SectionIndexMap, GroupSymbolIndex,
+                 SectionOffsetMap.lookup(&Section), Size, SD.getAlignment(),
+                 Section);
   }
 }
 
@@ -1727,102 +1585,77 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
   RevGroupMapTy RevGroupMap;
   SectionIndexMapTy SectionIndexMap;
 
-  unsigned NumUserSections = Asm.size();
-
   CompressDebugSections(Asm, const_cast<MCAsmLayout &>(Layout));
-
-  DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
-  const unsigned NumUserAndRelocSections = Asm.size();
-  createIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
-                        RevGroupMap, SectionIndexMap, RelMap);
-  const unsigned AllSections = Asm.size();
-  const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections;
-
-  unsigned NumRegularSections = NumUserSections + NumIndexedSections;
+  createIndexedSections(Asm, const_cast<MCAsmLayout &>(Layout), GroupMap,
+                        RevGroupMap, SectionIndexMap);
 
   // Compute symbol table information.
-  computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap,
-                     NumRegularSections);
+  computeSymbolTable(Asm, Layout, SectionIndexMap, RevGroupMap);
 
-  WriteRelocations(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
+  WriteRelocations(Asm, const_cast<MCAsmLayout &>(Layout));
 
   CreateMetadataSections(const_cast<MCAssembler&>(Asm),
                          const_cast<MCAsmLayout&>(Layout),
                          SectionIndexMap);
 
-  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
-  uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
-                                    sizeof(ELF::Elf32_Ehdr);
-  uint64_t FileOff = HeaderSize;
-
+  unsigned NumSections = Asm.size();
   std::vector<const MCSectionELF*> Sections;
-  ComputeSectionOrder(Asm, Sections);
-  unsigned NumSections = Sections.size();
-  SectionOffsetMapTy SectionOffsetMap;
-  for (unsigned i = 0; i < NumRegularSections + 1; ++i) {
-    const MCSectionELF &Section = *Sections[i];
-    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+  Sections.resize(NumSections);
 
-    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
-
-    // Remember the offset into the file for this section.
-    SectionOffsetMap[&Section] = FileOff;
+  for (auto &Pair : SectionIndexMap)
+    Sections[Pair.second - 1] = Pair.first;
 
-    // Get the size of the section in the output file (including padding).
-    FileOff += GetSectionFileSize(Layout, SD);
-  }
-
-  FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
-
-  const unsigned SectionHeaderOffset = FileOff - HeaderSize;
+  SectionOffsetMapTy SectionOffsetMap;
 
-  uint64_t SectionHeaderEntrySize = is64Bit() ?
-    sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr);
-  FileOff += (NumSections + 1) * SectionHeaderEntrySize;
+  // Write out the ELF header ...
+  WriteHeader(Asm, NumSections + 1);
 
-  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) {
+  // ... then the sections ...
+  for (unsigned i = 0; i < NumSections; ++i) {
     const MCSectionELF &Section = *Sections[i];
     const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
-
-    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+    uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment());
+    WriteZeros(Padding);
 
     // Remember the offset into the file for this section.
-    SectionOffsetMap[&Section] = FileOff;
+    SectionOffsetMap[&Section] = OS.tell();
 
-    // Get the size of the section in the output file (including padding).
-    FileOff += GetSectionFileSize(Layout, SD);
+    writeDataSectionData(Asm, Layout, SD);
   }
 
-  // Write out the ELF header ...
-  WriteHeader(Asm, SectionHeaderOffset, NumSections + 1);
-
-  // ... then the regular sections ...
-  // + because of .shstrtab
-  for (unsigned i = 0; i < NumRegularSections + 1; ++i)
-    WriteDataSectionData(Asm, Layout, *Sections[i]);
-
+  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
   uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment);
   WriteZeros(Padding);
 
+  const unsigned SectionHeaderOffset = OS.tell();
+
   // ... then the section header table ...
-  writeSectionHeader(Asm, GroupMap, Layout, SectionIndexMap, RelMap,
+  writeSectionHeader(Sections, Asm, GroupMap, Layout, SectionIndexMap,
                      SectionOffsetMap);
 
-  // ... and then the remaining sections ...
-  for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
-    WriteDataSectionData(Asm, Layout, *Sections[i]);
+  if (is64Bit()) {
+    uint64_t Val = SectionHeaderOffset;
+    if (sys::IsLittleEndianHost != IsLittleEndian)
+      sys::swapByteOrder(Val);
+    OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+              offsetof(ELF::Elf64_Ehdr, e_shoff));
+  } else {
+    uint32_t Val = SectionHeaderOffset;
+    if (sys::IsLittleEndianHost != IsLittleEndian)
+      sys::swapByteOrder(Val);
+    OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+              offsetof(ELF::Elf32_Ehdr, e_shoff));
+  }
 }
 
-bool
-ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-  if (::isWeak(DataA))
+bool ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbolData &DataA,
+    const MCSymbolData *DataB, const MCFragment &FB, bool InSet,
+    bool IsPCRel) const {
+  if (!InSet && (::isWeak(DataA) || (DataB && ::isWeak(*DataB))))
     return false;
   return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
-                                                 Asm, DataA, FB,InSet, IsPCRel);
+      Asm, DataA, DataB, FB, InSet, IsPCRel);
 }
 
 bool ELFObjectWriter::isWeak(const MCSymbolData &SD) const {
@@ -1830,7 +1663,7 @@ bool ELFObjectWriter::isWeak(const MCSymbolData &SD) const {
 }
 
 MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                            raw_ostream &OS,
+                                            raw_pwrite_stream &OS,
                                             bool IsLittleEndian) {
   return new ELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 62f5279..144d355 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -36,11 +36,10 @@ using namespace llvm;
 
 namespace {
 
-class MCAsmStreamer : public MCStreamer {
-protected:
+class MCAsmStreamer final : public MCStreamer {
+  std::unique_ptr<formatted_raw_ostream> OSOwner;
   formatted_raw_ostream &OS;
   const MCAsmInfo *MAI;
-private:
   std::unique_ptr<MCInstPrinter> InstPrinter;
   std::unique_ptr<MCCodeEmitter> Emitter;
   std::unique_ptr<MCAsmBackend> AsmBackend;
@@ -57,14 +56,15 @@ private:
   void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
 
 public:
-  MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
+  MCAsmStreamer(MCContext &Context, std::unique_ptr<formatted_raw_ostream> os,
                 bool isVerboseAsm, bool useDwarfDirectory,
                 MCInstPrinter *printer, MCCodeEmitter *emitter,
                 MCAsmBackend *asmbackend, bool showInst)
-      : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
-        InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
-        CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
-        ShowInst(showInst), UseDwarfDirectory(useDwarfDirectory) {
+      : MCStreamer(Context), OSOwner(std::move(os)), OS(*OSOwner),
+        MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter),
+        AsmBackend(asmbackend), CommentStream(CommentToEmit),
+        IsVerboseAsm(isVerboseAsm), ShowInst(showInst),
+        UseDwarfDirectory(useDwarfDirectory) {
     if (InstPrinter && IsVerboseAsm)
       InstPrinter->setCommentStream(CommentStream);
   }
@@ -1262,7 +1262,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &S
 
   // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
   if (InstPrinter)
-    InstPrinter->printInst(&Inst, OS, "");
+    InstPrinter->printInst(&Inst, OS, "", STI);
   else
     Inst.print(OS);
   EmitEOL();
@@ -1314,10 +1314,10 @@ void MCAsmStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
-                                    formatted_raw_ostream &OS,
+                                    std::unique_ptr<formatted_raw_ostream> OS,
                                     bool isVerboseAsm, bool useDwarfDirectory,
                                     MCInstPrinter *IP, MCCodeEmitter *CE,
                                     MCAsmBackend *MAB, bool ShowInst) {
-  return new MCAsmStreamer(Context, OS, isVerboseAsm, useDwarfDirectory, IP, CE,
-                           MAB, ShowInst);
+  return new MCAsmStreamer(Context, std::move(OS), isVerboseAsm,
+                           useDwarfDirectory, IP, CE, MAB, ShowInst);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 857eafc..d09e383 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -229,8 +229,9 @@ uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
   return getSectionAddressSize(SD);
 }
 
-uint64_t MCAsmLayout::computeBundlePadding(const MCFragment *F,
-                                           uint64_t FOffset, uint64_t FSize) {
+uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
+                                    const MCFragment *F,
+                                    uint64_t FOffset, uint64_t FSize) {
   uint64_t BundleSize = Assembler.getBundleAlignSize();
   assert(BundleSize > 0 &&
          "computeBundlePadding should only be called if bundling is enabled");
@@ -332,6 +333,7 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
     getFragmentList().insert(IP, F);
     F->setParent(this);
   }
+
   return IP;
 }
 
@@ -497,14 +499,12 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
     } else {
       const MCSymbolRefExpr *A = Target.getSymA();
       const MCSymbol &SA = A->getSymbol();
-      if (A->getKind() != MCSymbolRefExpr::VK_None ||
-          SA.AliasedSymbol().isUndefined()) {
+      if (A->getKind() != MCSymbolRefExpr::VK_None || SA.isUndefined()) {
         IsResolved = false;
       } else {
         const MCSymbolData &DataA = getSymbolData(SA);
-        IsResolved =
-          getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
-                                                             *DF, false, true);
+        IsResolved = getWriter().IsSymbolRefDifferenceFullyResolvedImpl(
+            *this, DataA, nullptr, *DF, false, true);
       }
     }
   } else {
@@ -514,12 +514,12 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
   Value = Target.getConstant();
 
   if (const MCSymbolRefExpr *A = Target.getSymA()) {
-    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = A->getSymbol();
     if (Sym.isDefined())
       Value += Layout.getSymbolOffset(&getSymbolData(Sym));
   }
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
-    const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = B->getSymbol();
     if (Sym.isDefined())
       Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
   }
@@ -634,7 +634,12 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
   // The fragment's offset will point to after the padding, and its computed
   // size won't include the padding.
   //
-  if (Assembler.isBundlingEnabled() && F->hasInstructions()) {
+  // When the -mc-relax-all flag is used, we optimize bundling by writting the
+  // bundle padding directly into fragments when the instructions are emitted
+  // inside the streamer.
+  //
+  if (Assembler.isBundlingEnabled() && !Assembler.getRelaxAll() &&
+      F->hasInstructions()) {
     assert(isa<MCEncodedFragment>(F) &&
            "Only MCEncodedFragment implementations have instructions");
     uint64_t FSize = Assembler.computeFragmentSize(*this, *F);
@@ -642,7 +647,8 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
     if (FSize > Assembler.getBundleAlignSize())
       report_fatal_error("Fragment can't be larger than a bundle size");
 
-    uint64_t RequiredBundlePadding = computeBundlePadding(F, F->Offset, FSize);
+    uint64_t RequiredBundlePadding = computeBundlePadding(Assembler, F,
+                                                          F->Offset, FSize);
     if (RequiredBundlePadding > UINT8_MAX)
       report_fatal_error("Padding cannot exceed 255 bytes");
     F->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
@@ -657,24 +663,18 @@ static void writeFragmentContents(const MCFragment &F, MCObjectWriter *OW) {
   OW->WriteBytes(EF.getContents());
 }
 
-/// \brief Write the fragment \p F to the output file.
-static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                          const MCFragment &F) {
-  MCObjectWriter *OW = &Asm.getWriter();
-
-  // FIXME: Embed in fragments instead?
-  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
-
+void MCAssembler::writeFragmentPadding(const MCFragment &F, uint64_t FSize,
+                                       MCObjectWriter *OW) const {
   // Should NOP padding be written out before this fragment?
   unsigned BundlePadding = F.getBundlePadding();
   if (BundlePadding > 0) {
-    assert(Asm.isBundlingEnabled() &&
+    assert(isBundlingEnabled() &&
            "Writing bundle padding with disabled bundling");
     assert(F.hasInstructions() &&
            "Writing bundle padding for a fragment without instructions");
 
-    unsigned TotalLength = BundlePadding + static_cast<unsigned>(FragmentSize);
-    if (F.alignToBundleEnd() && TotalLength > Asm.getBundleAlignSize()) {
+    unsigned TotalLength = BundlePadding + static_cast<unsigned>(FSize);
+    if (F.alignToBundleEnd() && TotalLength > getBundleAlignSize()) {
       // If the padding itself crosses a bundle boundary, it must be emitted
       // in 2 pieces, since even nop instructions must not cross boundaries.
       //             v--------------v   <- BundleAlignSize
@@ -683,16 +683,27 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
       // | Prev |####|####|    F    |
       // ----------------------------
       //        ^-------------------^   <- TotalLength
-      unsigned DistanceToBoundary = TotalLength - Asm.getBundleAlignSize();
-      if (!Asm.getBackend().writeNopData(DistanceToBoundary, OW))
+      unsigned DistanceToBoundary = TotalLength - getBundleAlignSize();
+      if (!getBackend().writeNopData(DistanceToBoundary, OW))
           report_fatal_error("unable to write NOP sequence of " +
                              Twine(DistanceToBoundary) + " bytes");
       BundlePadding -= DistanceToBoundary;
     }
-    if (!Asm.getBackend().writeNopData(BundlePadding, OW))
+    if (!getBackend().writeNopData(BundlePadding, OW))
       report_fatal_error("unable to write NOP sequence of " +
                          Twine(BundlePadding) + " bytes");
   }
+}
+
+/// \brief Write the fragment \p F to the output file.
+static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                          const MCFragment &F) {
+  MCObjectWriter *OW = &Asm.getWriter();
+
+  // FIXME: Embed in fragments instead?
+  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
+
+  Asm.writeFragmentPadding(F, FragmentSize, OW);
 
   // This variable (and its dummy usage) is to participate in the assert at
   // the end of the function.
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 3cb3ea1..5f8e3c1 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -139,6 +139,11 @@ MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName,
                            "$frame_escape_" + Twine(Idx));
 }
 
+MCSymbol *MCContext::getOrCreateParentFrameOffsetSymbol(StringRef FuncName) {
+  return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName +
+                           "$parent_frame_offset");
+}
+
 MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) {
   // Determine whether this is an assembler temporary or normal label, if used.
   bool IsTemporary = false;
@@ -257,41 +262,63 @@ MCContext::getMachOSection(StringRef Segment, StringRef Section,
                                             Reserved2, Kind, Begin);
 }
 
-const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
-                                             unsigned Flags,
-                                             const char *BeginSymName) {
-  return getELFSection(Section, Type, Flags, 0, "", BeginSymName);
-}
-
 void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) {
   StringRef GroupName;
   if (const MCSymbol *Group = Section->getGroup())
     GroupName = Group->getName();
 
-  ELFUniquingMap.erase(SectionGroupPair(Section->getSectionName(), GroupName));
-  auto I =
-      ELFUniquingMap.insert(std::make_pair(SectionGroupPair(Name, GroupName),
-                                           Section)).first;
-  StringRef CachedName = I->first.first;
+  unsigned UniqueID = Section->getUniqueID();
+  ELFUniquingMap.erase(
+      ELFSectionKey{Section->getSectionName(), GroupName, UniqueID});
+  auto I = ELFUniquingMap.insert(std::make_pair(
+                                     ELFSectionKey{Name, GroupName, UniqueID},
+                                     Section)).first;
+  StringRef CachedName = I->first.SectionName;
   const_cast<MCSectionELF*>(Section)->setSectionName(CachedName);
 }
 
+const MCSectionELF *
+MCContext::createELFRelSection(StringRef Name, unsigned Type, unsigned Flags,
+                               unsigned EntrySize, const MCSymbol *Group,
+                               const MCSectionELF *Associated) {
+  StringMap<bool>::iterator I;
+  bool Inserted;
+  std::tie(I, Inserted) = ELFRelSecNames.insert(std::make_pair(Name, true));
+
+  return new (*this)
+      MCSectionELF(I->getKey(), Type, Flags, SectionKind::getReadOnly(),
+                   EntrySize, Group, true, nullptr, Associated);
+}
+
 const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
                                              unsigned Flags, unsigned EntrySize,
-                                             StringRef Group, bool Unique,
+                                             StringRef Group, unsigned UniqueID,
                                              const char *BeginSymName) {
+  MCSymbol *GroupSym = nullptr;
+  if (!Group.empty())
+    GroupSym = GetOrCreateSymbol(Group);
+
+  return getELFSection(Section, Type, Flags, EntrySize, GroupSym, UniqueID,
+                       BeginSymName, nullptr);
+}
+
+const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
+                                             unsigned Flags, unsigned EntrySize,
+                                             const MCSymbol *GroupSym,
+                                             unsigned UniqueID,
+                                             const char *BeginSymName,
+                                             const MCSectionELF *Associated) {
+  StringRef Group = "";
+  if (GroupSym)
+    Group = GroupSym->getName();
   // Do the lookup, if we have a hit, return it.
   auto IterBool = ELFUniquingMap.insert(
-      std::make_pair(SectionGroupPair(Section, Group), nullptr));
+      std::make_pair(ELFSectionKey{Section, Group, UniqueID}, nullptr));
   auto &Entry = *IterBool.first;
-  if (!IterBool.second && !Unique)
+  if (!IterBool.second)
     return Entry.second;
 
-  MCSymbol *GroupSym = nullptr;
-  if (!Group.empty())
-    GroupSym = GetOrCreateSymbol(Group);
-
-  StringRef CachedName = Entry.first.first;
+  StringRef CachedName = Entry.first.SectionName;
 
   SectionKind Kind;
   if (Flags & ELF::SHF_EXECINSTR)
@@ -303,25 +330,17 @@ const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
   if (BeginSymName)
     Begin = createTempSymbol(BeginSymName, false);
 
-  MCSectionELF *Result = new (*this) MCSectionELF(
-      CachedName, Type, Flags, Kind, EntrySize, GroupSym, Unique, Begin);
-  if (!Unique)
-    Entry.second = Result;
+  MCSectionELF *Result =
+      new (*this) MCSectionELF(CachedName, Type, Flags, Kind, EntrySize,
+                               GroupSym, UniqueID, Begin, Associated);
+  Entry.second = Result;
   return Result;
 }
 
-const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
-                                             unsigned Flags, unsigned EntrySize,
-                                             StringRef Group,
-                                             const char *BeginSymName) {
-  return getELFSection(Section, Type, Flags, EntrySize, Group, false,
-                       BeginSymName);
-}
-
 const MCSectionELF *MCContext::CreateELFGroupSection() {
   MCSectionELF *Result = new (*this)
       MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4,
-                   nullptr, false, nullptr);
+                   nullptr, ~0, nullptr, nullptr);
   return Result;
 }
 
@@ -329,23 +348,24 @@ const MCSectionCOFF *
 MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
                           SectionKind Kind, StringRef COMDATSymName,
                           int Selection, const char *BeginSymName) {
-  // Do the lookup, if we have a hit, return it.
+  MCSymbol *COMDATSymbol = nullptr;
+  if (!COMDATSymName.empty()) {
+    COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
+    COMDATSymName = COMDATSymbol->getName();
+  }
 
-  SectionGroupTriple T(Section, COMDATSymName, Selection);
+  // Do the lookup, if we have a hit, return it.
+  COFFSectionKey T{Section, COMDATSymName, Selection};
   auto IterBool = COFFUniquingMap.insert(std::make_pair(T, nullptr));
   auto Iter = IterBool.first;
   if (!IterBool.second)
     return Iter->second;
 
-  MCSymbol *COMDATSymbol = nullptr;
-  if (!COMDATSymName.empty())
-    COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
-
   MCSymbol *Begin = nullptr;
   if (BeginSymName)
     Begin = createTempSymbol(BeginSymName, false);
 
-  StringRef CachedName = std::get<0>(Iter->first);
+  StringRef CachedName = Iter->first.SectionName;
   MCSectionCOFF *Result = new (*this) MCSectionCOFF(
       CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin);
 
@@ -361,7 +381,7 @@ const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
 }
 
 const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
-  SectionGroupTriple T(Section, "", 0);
+  COFFSectionKey T{Section, "", 0};
   auto Iter = COFFUniquingMap.find(T);
   if (Iter == COFFUniquingMap.end())
     return nullptr;
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index d9f01d0..716d76a 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -33,22 +33,22 @@ using namespace llvm;
 // disassembler context.  If not, it returns NULL.
 //
 LLVMDisasmContextRef
-LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
+LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU,
                             const char *Features, void *DisInfo, int TagType,
                             LLVMOpInfoCallback GetOpInfo,
                             LLVMSymbolLookupCallback SymbolLookUp) {
   // Get the target.
   std::string Error;
-  const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
   if (!TheTarget)
     return nullptr;
 
-  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
+  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TT);
   if (!MRI)
     return nullptr;
 
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple);
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, TT);
   if (!MAI)
     return nullptr;
 
@@ -56,8 +56,8 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
   if (!MII)
     return nullptr;
 
-  const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
-                                                                Features);
+  const MCSubtargetInfo *STI =
+      TheTarget->createMCSubtargetInfo(TT, CPU, Features);
   if (!STI)
     return nullptr;
 
@@ -72,25 +72,24 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
     return nullptr;
 
   std::unique_ptr<MCRelocationInfo> RelInfo(
-      TheTarget->createMCRelocationInfo(Triple, *Ctx));
+      TheTarget->createMCRelocationInfo(TT, *Ctx));
   if (!RelInfo)
     return nullptr;
 
   std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
-      Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo)));
+      TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx, std::move(RelInfo)));
   DisAsm->setSymbolizer(std::move(Symbolizer));
 
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
-  MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
-                                                     *MAI, *MII, *MRI, *STI);
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(
+      Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI);
   if (!IP)
     return nullptr;
 
-  LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
-                                                GetOpInfo, SymbolLookUp,
-                                                TheTarget, MAI, MRI,
-                                                STI, MII, Ctx, DisAsm, IP);
+  LLVMDisasmContext *DC =
+      new LLVMDisasmContext(TT, DisInfo, TagType, GetOpInfo, SymbolLookUp,
+                            TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP);
   if (!DC)
     return nullptr;
 
@@ -98,19 +97,19 @@ LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU,
   return DC;
 }
 
-LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
-                                         void *DisInfo, int TagType,
-                                         LLVMOpInfoCallback GetOpInfo,
-                                         LLVMSymbolLookupCallback SymbolLookUp){
-  return LLVMCreateDisasmCPUFeatures(Triple, CPU, "", DisInfo, TagType,
-                                     GetOpInfo, SymbolLookUp);
+LLVMDisasmContextRef
+LLVMCreateDisasmCPU(const char *TT, const char *CPU, void *DisInfo, int TagType,
+                    LLVMOpInfoCallback GetOpInfo,
+                    LLVMSymbolLookupCallback SymbolLookUp) {
+  return LLVMCreateDisasmCPUFeatures(TT, CPU, "", DisInfo, TagType, GetOpInfo,
+                                     SymbolLookUp);
 }
 
-LLVMDisasmContextRef LLVMCreateDisasm(const char *Triple, void *DisInfo,
+LLVMDisasmContextRef LLVMCreateDisasm(const char *TT, void *DisInfo,
                                       int TagType, LLVMOpInfoCallback GetOpInfo,
                                       LLVMSymbolLookupCallback SymbolLookUp) {
-  return LLVMCreateDisasmCPUFeatures(Triple, "", "", DisInfo, TagType,
-                                     GetOpInfo, SymbolLookUp);
+  return LLVMCreateDisasmCPUFeatures(TT, "", "", DisInfo, TagType, GetOpInfo,
+                                     SymbolLookUp);
 }
 
 //
@@ -268,7 +267,7 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
     SmallVector<char, 64> InsnStr;
     raw_svector_ostream OS(InsnStr);
     formatted_raw_ostream FormattedOS(OS);
-    IP->printInst(&Inst, FormattedOS, AnnotationsStr);
+    IP->printInst(&Inst, FormattedOS, AnnotationsStr, *DC->getSubtargetInfo());
 
     if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency)
       emitLatency(DC, Inst);
@@ -312,11 +311,10 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
       const MCAsmInfo *MAI = DC->getAsmInfo();
       const MCInstrInfo *MII = DC->getInstrInfo();
       const MCRegisterInfo *MRI = DC->getRegisterInfo();
-      const MCSubtargetInfo *STI = DC->getSubtargetInfo();
       int AsmPrinterVariant = MAI->getAssemblerDialect();
       AsmPrinterVariant = AsmPrinterVariant == 0 ? 1 : 0;
       MCInstPrinter *IP = DC->getTarget()->createMCInstPrinter(
-          AsmPrinterVariant, *MAI, *MII, *MRI, *STI);
+          Triple(DC->getTripleName()), AsmPrinterVariant, *MAI, *MII, *MRI);
       if (IP) {
         DC->setIP(IP);
         DC->addOptions(LLVMDisassembler_Option_AsmPrinterVariant);
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 87e7ed1..e9f685e 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -803,7 +803,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) {
 
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
 
-  for (const auto sec : Sections) {
+  for (const auto &sec : Sections) {
 
     MCSymbol *StartSymbol = sec.second.first;
     MCSymbol *EndSymbol = sec.second.second;
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index 84176dc..dc3d6c3 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -28,3 +28,24 @@ bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
                                                       unsigned Type) const {
   return false;
 }
+
+// ELF doesn't require relocations to be in any order. We sort by the Offset,
+// just to match gnu as for easier comparison. The use type is an arbitrary way
+// of making the sort deterministic.
+static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
+  const ELFRelocationEntry &A = *AP;
+  const ELFRelocationEntry &B = *BP;
+  if (A.Offset != B.Offset)
+    return B.Offset - A.Offset;
+  if (B.Type != A.Type)
+    return A.Type - B.Type;
+  //llvm_unreachable("ELFRelocs might be unstable!");
+  return 0;
+}
+
+
+void
+MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
+                                    std::vector<ELFRelocationEntry> &Relocs) {
+  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
+}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index cdf5033..aa05390 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
@@ -40,6 +41,48 @@ using namespace llvm;
 MCELFStreamer::~MCELFStreamer() {
 }
 
+void MCELFStreamer::mergeFragment(MCDataFragment *DF,
+                                  MCEncodedFragmentWithFixups *EF) {
+  MCAssembler &Assembler = getAssembler();
+
+  if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
+    uint64_t FSize = EF->getContents().size();
+
+    if (FSize > Assembler.getBundleAlignSize())
+      report_fatal_error("Fragment can't be larger than a bundle size");
+
+    uint64_t RequiredBundlePadding = computeBundlePadding(
+        Assembler, EF, DF->getContents().size(), FSize);
+
+    if (RequiredBundlePadding > UINT8_MAX)
+      report_fatal_error("Padding cannot exceed 255 bytes");
+
+    if (RequiredBundlePadding > 0) {
+      SmallString<256> Code;
+      raw_svector_ostream VecOS(Code);
+      MCObjectWriter *OW = Assembler.getBackend().createObjectWriter(VecOS);
+
+      EF->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
+
+      Assembler.writeFragmentPadding(*EF, FSize, OW);
+      VecOS.flush();
+      delete OW;
+
+      DF->getContents().append(Code.begin(), Code.end());
+    }
+  }
+
+  flushPendingLabels(DF, DF->getContents().size());
+
+  for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) {
+    EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() +
+                                 DF->getContents().size());
+    DF->getFixups().push_back(EF->getFixups()[i]);
+  }
+  DF->setHasInstructions(true);
+  DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
+}
+
 void MCELFStreamer::InitSections(bool NoExecStack) {
   // This emulates the same behavior of GNU as. This makes it easier
   // to compare the output as the major sections are in the same order.
@@ -449,7 +492,16 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
 
   if (Assembler.isBundlingEnabled()) {
     MCSectionData *SD = getCurrentSectionData();
-    if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
+    if (Assembler.getRelaxAll() && SD->isBundleLocked())
+      // If the -mc-relax-all flag is used and we are bundle-locked, we re-use
+      // the current bundle group.
+      DF = BundleGroups.back();
+    else if (Assembler.getRelaxAll() && !SD->isBundleLocked())
+      // When not in a bundle-locked group and the -mc-relax-all flag is used,
+      // we create a new temporary fragment which will be later merged into
+      // the current fragment.
+      DF = new MCDataFragment();
+    else if (SD->isBundleLocked() && !SD->isBundleGroupBeforeFirstInst())
       // If we are bundle-locked, we re-use the current fragment.
       // The bundle-locking directive ensures this is a new data fragment.
       DF = cast<MCDataFragment>(getCurrentFragment());
@@ -487,6 +539,14 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
   }
   DF->setHasInstructions(true);
   DF->getContents().append(Code.begin(), Code.end());
+
+  if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
+    MCSectionData *SD = getCurrentSectionData();
+    if (!SD->isBundleLocked()) {
+      mergeFragment(getOrCreateDataFragment(), DF);
+      delete DF;
+    }
+  }
 }
 
 void MCELFStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
@@ -510,6 +570,12 @@ void MCELFStreamer::EmitBundleLock(bool AlignToEnd) {
   if (!SD->isBundleLocked())
     SD->setBundleGroupBeforeFirstInst(true);
 
+  if (getAssembler().getRelaxAll() && !SD->isBundleLocked()) {
+    // TODO: drop the lock state and set directly in the fragment
+    MCDataFragment *DF = new MCDataFragment();
+    BundleGroups.push_back(DF);
+  }
+
   SD->setBundleLockState(AlignToEnd ? MCSectionData::BundleLockedAlignToEnd :
                                       MCSectionData::BundleLocked);
 }
@@ -525,7 +591,27 @@ void MCELFStreamer::EmitBundleUnlock() {
   else if (SD->isBundleGroupBeforeFirstInst())
     report_fatal_error("Empty bundle-locked group is forbidden");
 
-  SD->setBundleLockState(MCSectionData::NotBundleLocked);
+  // When the -mc-relax-all flag is used, we emit instructions to fragments
+  // stored on a stack. When the bundle unlock is emited, we pop a fragment 
+  // from the stack a merge it to the one below.
+  if (getAssembler().getRelaxAll()) {
+    assert(!BundleGroups.empty() && "There are no bundle groups");
+    MCDataFragment *DF = BundleGroups.back();
+
+    // FIXME: Use BundleGroups to track the lock state instead.
+    SD->setBundleLockState(MCSectionData::NotBundleLocked);
+
+    // FIXME: Use more separate fragments for nested groups. 
+    if (!SD->isBundleLocked()) {
+      mergeFragment(getOrCreateDataFragment(), DF);
+      BundleGroups.pop_back();
+      delete DF;
+    }
+
+    if (SD->getBundleLockState() != MCSectionData::BundleLockedAlignToEnd)
+      getOrCreateDataFragment()->setAlignToBundleEnd(false);
+  } else
+    SD->setBundleLockState(MCSectionData::NotBundleLocked);
 }
 
 void MCELFStreamer::Flush() {
@@ -561,7 +647,7 @@ void MCELFStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                    raw_ostream &OS, MCCodeEmitter *CE,
+                                    raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                     bool RelaxAll) {
   MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
   if (RelaxAll)
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 8a64403..0702539 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -775,6 +775,10 @@ const MCSection *MCExpr::FindAssociatedSection() const {
     if (RHS_S == MCSymbol::AbsolutePseudoSection)
       return LHS_S;
 
+    // Not always correct, but probably the best we can do without more context.
+    if (BE->getOpcode() == MCBinaryExpr::Sub)
+      return MCSymbol::AbsolutePseudoSection;
+
     // Otherwise, return the first non-null section.
     return LHS_S ? LHS_S : RHS_S;
   }
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index d5c7101..5c78f5f 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -54,7 +54,7 @@ private:
   void EmitDataRegionEnd();
 
 public:
-  MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
+  MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
                   MCCodeEmitter *Emitter, bool DWARFMustBeAtTheEnd, bool label)
       : MCObjectStreamer(Context, MAB, OS, Emitter), LabelSections(label),
         DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), CreatedADWARFSection(false) {}
@@ -491,7 +491,7 @@ void MCMachOStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                      raw_ostream &OS, MCCodeEmitter *CE,
+                                      raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                       bool RelaxAll, bool DWARFMustBeAtTheEnd,
                                       bool LabelSections) {
   MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE,
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 6aa2de3..d254e95 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -24,18 +24,13 @@
 using namespace llvm;
 
 MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                   raw_ostream &OS, MCCodeEmitter *Emitter_)
+                                   raw_pwrite_stream &OS,
+                                   MCCodeEmitter *Emitter_)
     : MCStreamer(Context),
       Assembler(new MCAssembler(Context, TAB, *Emitter_,
                                 *TAB.createObjectWriter(OS), OS)),
       CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {}
 
-MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                   raw_ostream &OS, MCCodeEmitter *Emitter_,
-                                   MCAssembler *Assembler)
-    : MCStreamer(Context), Assembler(Assembler), CurSectionData(nullptr),
-      EmitEHFrame(true), EmitDebugFrame(false) {}
-
 MCObjectStreamer::~MCObjectStreamer() {
   delete &Assembler->getBackend();
   delete &Assembler->getEmitter();
@@ -43,7 +38,7 @@ MCObjectStreamer::~MCObjectStreamer() {
   delete Assembler;
 }
 
-void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
+void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
   if (PendingLabels.size()) {
     if (!F) {
       F = new MCDataFragment();
@@ -52,7 +47,7 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F) {
     }
     for (MCSymbolData *SD : PendingLabels) {
       SD->setFragment(F);
-      SD->setOffset(0);
+      SD->setOffset(FOffset);
     }
     PendingLabels.clear();
   }
@@ -93,7 +88,8 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() {
   MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
   // When bundling is enabled, we don't want to add data to a fragment that
   // already has instructions (see MCELFStreamer::EmitInstToData for details)
-  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
+  if (!F || (Assembler->isBundlingEnabled() && !Assembler->getRelaxAll() &&
+             F->hasInstructions())) {
     F = new MCDataFragment();
     insert(F);
   }
@@ -149,7 +145,9 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
   // If there is a current fragment, mark the symbol as pointing into it.
   // Otherwise queue the label and set its fragment pointer when we emit the
   // next fragment.
-  if (auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment())) {
+  auto *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (F && !(getAssembler().isBundlingEnabled() &&
+             getAssembler().getRelaxAll())) {
     SD.setFragment(F);
     SD.setOffset(F->getContents().size());
   } else {
@@ -248,6 +246,9 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
 
 void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
                                           const MCSubtargetInfo &STI) {
+  if (getAssembler().getRelaxAll() && getAssembler().isBundlingEnabled())
+    llvm_unreachable("All instructions should have already been relaxed");
+
   // Always create a new, separate fragment here, because its size can change
   // during relaxation.
   MCRelaxableFragment *IF = new MCRelaxableFragment(Inst, STI);
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 3c536ec..e40c07d 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -27,7 +27,7 @@ bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(
 
   const MCSymbol &SA = A->getSymbol();
   const MCSymbol &SB = B->getSymbol();
-  if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+  if (SA.isUndefined() || SB.isUndefined())
     return false;
 
   const MCSymbolData &DataA = Asm.getSymbolData(SA);
@@ -35,19 +35,15 @@ bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(
   if(!DataA.getFragment() || !DataB.getFragment())
     return false;
 
-  return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
-                                                *DataB.getFragment(),
-                                                InSet,
-                                                false);
+  return IsSymbolRefDifferenceFullyResolvedImpl(
+      Asm, DataA, &DataB, *DataB.getFragment(), InSet, false);
 }
 
-bool
-MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-  const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
+bool MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbolData &DataA,
+    const MCSymbolData *DataB, const MCFragment &FB, bool InSet,
+    bool IsPCRel) const {
+  const MCSection &SecA = DataA.getSymbol().getSection();
   const MCSection &SecB = FB.getParent()->getSection();
   // On ELF and COFF  A - B is absolute if A and B are in the same section.
   return &SecA == &SecB;
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 2bf980b..92a7507 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -175,7 +175,7 @@ private:
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
-  virtual ~AsmParser();
+  ~AsmParser() override;
 
   bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
 
@@ -4606,7 +4606,7 @@ bool AsmParser::parseMSInlineAsm(
         ++InputIdx;
         OutputDecls.push_back(OpDecl);
         OutputDeclsAddressOf.push_back(Operand.needAddressOf());
-        OutputConstraints.push_back('=' + Operand.getConstraint().str());
+        OutputConstraints.push_back(("=" + Operand.getConstraint()).str());
         AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
       } else {
         InputDecls.push_back(OpDecl);
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 7a120a1..a19339d 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -379,7 +379,7 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
   const MCExpr *Subsection = nullptr;
   bool UseLastGroup = false;
   StringRef UniqueStr;
-  bool Unique = false;
+  int64_t UniqueID = ~0;
 
   // Set the defaults first.
   if (SectionName == ".fini" || SectionName == ".init" ||
@@ -470,7 +470,15 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
           return TokError("expected identifier in directive");
         if (UniqueStr != "unique")
           return TokError("expected 'unique'");
-        Unique = true;
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected commma");
+        Lex();
+        if (getParser().parseAbsoluteExpression(UniqueID))
+          return true;
+        if (UniqueID < 0)
+          return TokError("unique id must be positive");
+        if (!isUInt<32>(UniqueID) || UniqueID == ~0U)
+          return TokError("unique id is too large");
       }
     }
   }
@@ -520,7 +528,7 @@ EndStmt:
   }
 
   const MCSection *ELFSection = getContext().getELFSection(
-      SectionName, Type, Flags, Size, GroupName, Unique);
+      SectionName, Type, Flags, Size, GroupName, UniqueID);
   getStreamer().SwitchSection(ELFSection, Subsection);
 
   if (getContext().getGenDwarfForAssembly()) {
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index da38682..3cd8453 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -24,7 +24,7 @@ MCSectionELF::~MCSectionELF() {} // anchor.
 bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
                                               const MCAsmInfo &MAI) const {
 
-  if (Unique)
+  if (isUnique())
     return false;
 
   // FIXME: Does .section .bss/.data/.text work everywhere??
@@ -148,8 +148,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
     OS << ",comdat";
   }
 
-  if (Unique)
-    OS << ",unique";
+  if (isUnique())
+    OS << ",unique," << UniqueID;
 
   OS << '\n';
 
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index ca3894b..daba321 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -93,9 +93,10 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
   const SubtargetInfoKV *Found =
     std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU);
   if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) {
-    errs() << "'" << CPU
-           << "' is not a recognized processor for this target"
-           << " (ignoring processor)\n";
+    if (CPU != "help") // Don't error if the user asked for help.
+      errs() << "'" << CPU
+             << "' is not a recognized processor for this target"
+             << " (ignoring processor)\n";
     return MCSchedModel::GetDefaultSchedModel();
   }
   assert(Found->Value && "Missing processor SchedModel value");
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 2416525..6582574 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -55,13 +55,7 @@ void MCSymbol::setVariableValue(const MCExpr *Value) {
   assert(!IsUsed && "Cannot set a variable that has already been used.");
   assert(Value && "Invalid variable value!");
   this->Value = Value;
-
-  // Variables should always be marked as in the same "section" as the value.
-  const MCSection *Section = Value->FindAssociatedSection();
-  if (Section)
-    setSection(*Section);
-  else
-    setUndefined();
+  this->Section = nullptr;
 }
 
 void MCSymbol::print(raw_ostream &OS) const {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 5e9e86f..837f585 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -649,38 +649,18 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
   }
 }
 
-void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm,
-                                                   const MCAsmLayout &Layout) {
-  for (MCSymbolData &SD : Asm.symbols()) {
-    if (!SD.getSymbol().isVariable())
-      continue;
-
-    // Is the variable is a symbol difference (SA - SB + C) expression,
-    // and neither symbol is external, mark the variable as absolute.
-    const MCExpr *Expr = SD.getSymbol().getVariableValue();
-    MCValue Value;
-    if (Expr->EvaluateAsRelocatable(Value, &Layout, nullptr)) {
-      if (Value.getSymA() && Value.getSymB())
-        const_cast<MCSymbol*>(&SD.getSymbol())->setAbsolute();
-    }
-  }
-}
-
 void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
                                                 const MCAsmLayout &Layout) {
   computeSectionAddresses(Asm, Layout);
 
   // Create symbol data for any indirect symbols.
   BindIndirectSymbols(Asm);
-
-  // Mark symbol difference expressions in variables (from .set or = directives)
-  // as absolute.
-  markAbsoluteVariableSymbols(Asm, Layout);
 }
 
 bool MachObjectWriter::
 IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                        const MCSymbolData &DataA,
+                                       const MCSymbolData *DataB,
                                        const MCFragment &FB,
                                        bool InSet,
                                        bool IsPCRel) const {
@@ -1027,7 +1007,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
 }
 
 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
-                                             raw_ostream &OS,
+                                             raw_pwrite_stream &OS,
                                              bool IsLittleEndian) {
   return new MachObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index ec6c9cb..b600baf 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -81,11 +81,12 @@ static std::string Join(const std::vector<std::string> &V) {
 }
 
 /// Adding features.
-void SubtargetFeatures::AddFeature(StringRef String) {
-  // Don't add empty features or features we already have.
+void SubtargetFeatures::AddFeature(StringRef String, bool Enable) {
+  // Don't add empty features.
   if (!String.empty())
     // Convert to lowercase, prepend flag if we don't already have a flag.
-    Features.push_back(hasFlag(String) ? String.str() : "+" + String.lower());
+    Features.push_back(hasFlag(String) ? String.lower()
+                                       : (Enable ? "+" : "-") + String.lower());
 }
 
 /// Find KV in array using binary search.
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index c6bc81d..38bb883 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -126,8 +126,8 @@ public:
 
   bool UseBigObj;
 
-  WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
-  
+  WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_pwrite_stream &OS);
+
   void reset() override {
     memset(&Header, 0, sizeof(Header));
     Header.Machine = TargetObjectWriter->getMachine();
@@ -172,6 +172,7 @@ public:
 
   bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                               const MCSymbolData &DataA,
+                                              const MCSymbolData *DataB,
                                               const MCFragment &FB, bool InSet,
                                               bool IsPCRel) const override;
 
@@ -257,7 +258,7 @@ size_t COFFSection::size() {
 // WinCOFFObjectWriter class implementation
 
 WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
-                                         raw_ostream &OS)
+                                         raw_pwrite_stream &OS)
     : MCObjectWriter(OS, true), TargetObjectWriter(MOTW) {
   memset(&Header, 0, sizeof(Header));
 
@@ -382,9 +383,7 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
 
       coff_symbol->Other = GetOrCreateCOFFSymbol(&SymRef->getSymbol());
     } else {
-      std::string WeakName = std::string(".weak.")
-                           +  Symbol.getName().str()
-                           + ".default";
+      std::string WeakName = (".weak." + Symbol.getName() + ".default").str();
       COFFSymbol *WeakDefault = createSymbol(WeakName);
       WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
       WeakDefault->Data.StorageClass  = COFF::IMAGE_SYM_CLASS_EXTERNAL;
@@ -651,16 +650,17 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
 }
 
 bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
-    const MCAssembler &Asm, const MCSymbolData &DataA, const MCFragment &FB,
-    bool InSet, bool IsPCRel) const {
+    const MCAssembler &Asm, const MCSymbolData &DataA,
+    const MCSymbolData *DataB, const MCFragment &FB, bool InSet,
+    bool IsPCRel) const {
   // MS LINK expects to be able to replace all references to a function with a
   // thunk to implement their /INCREMENTAL feature.  Make sure we don't optimize
   // away any relocations to functions.
   if ((((DataA.getFlags() & COFF::SF_TypeMask) >> COFF::SF_TypeShift) >>
        COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
     return false;
-  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA, FB,
-                                                                InSet, IsPCRel);
+  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+      Asm, DataA, DataB, FB, InSet, IsPCRel);
 }
 
 bool WinCOFFObjectWriter::isWeak(const MCSymbolData &SD) const {
@@ -1073,9 +1073,8 @@ void MCWinCOFFObjectTargetWriter::anchor() {}
 //------------------------------------------------------------------------------
 // WinCOFFObjectWriter factory function
 
-namespace llvm {
-  MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
-                                            raw_ostream &OS) {
-    return new WinCOFFObjectWriter(MOTW, OS);
-  }
+MCObjectWriter *
+llvm::createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+                                raw_pwrite_stream &OS) {
+  return new WinCOFFObjectWriter(MOTW, OS);
 }
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index f902d2b..8f9aacb 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
 
 namespace llvm {
 MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     MCCodeEmitter &CE, raw_ostream &OS)
+                                     MCCodeEmitter &CE, raw_pwrite_stream &OS)
     : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(nullptr) {}
 
 void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst,
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index ad278a4..4c38b8f 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -262,7 +262,7 @@ std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
   }
   const section_iterator SecEnd = section_end();
   uint64_t AfterAddr = UnknownAddressOrSize;
-  for (const symbol_iterator &SymbI : symbols()) {
+  for (const symbol_iterator SymbI : symbols()) {
     section_iterator SecI = SecEnd;
     if (std::error_code EC = SymbI->getSection(SecI))
       return EC;
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 4a1c311..7129aa3 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -1537,7 +1537,7 @@ bool ExportEntry::operator==(const ExportEntry &Other) const {
   if (Stack.size() != Other.Stack.size())
     return false;
   // Not equal if different cumulative strings.
-  if (!CumulativeString.str().equals(Other.CumulativeString.str()))
+  if (!CumulativeString.equals(Other.CumulativeString))
     return false;
   // Equal if all nodes in both stacks match.
   for (unsigned i=0; i < Stack.size(); ++i) {
@@ -1559,7 +1559,7 @@ uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) {
 }
 
 StringRef ExportEntry::name() const {
-  return CumulativeString.str();
+  return CumulativeString;
 }
 
 uint64_t ExportEntry::flags() const {
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index 4bc8f92..b771a18 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -395,7 +395,7 @@ Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
 
 Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
                                    StringRef Value) const {
-  unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
+  unsigned Index = BaseArgs.MakeIndex((Opt.getName() + Value).str());
   SynthesizedArgs.push_back(make_unique<Arg>(
       Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index,
       BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 2533fa0..228f75e 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -162,7 +162,7 @@ APInt& APInt::operator=(uint64_t RHS) {
   return clearUnusedBits();
 }
 
-/// Profile - This method 'profiles' an APInt for use with FoldingSet.
+/// This method 'profiles' an APInt for use with FoldingSet.
 void APInt::Profile(FoldingSetNodeID& ID) const {
   ID.AddInteger(BitWidth);
 
@@ -176,7 +176,7 @@ void APInt::Profile(FoldingSetNodeID& ID) const {
     ID.AddInteger(pVal[i]);
 }
 
-/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// This function adds a single "digit" integer, y, to the multiple
 /// "digit" integer array,  x[]. x[] is modified to reflect the addition and
 /// 1 is returned if there is a carry out, otherwise 0 is returned.
 /// @returns the carry of the addition.
@@ -202,7 +202,7 @@ APInt& APInt::operator++() {
   return clearUnusedBits();
 }
 
-/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// This function subtracts a single "digit" (64-bit word), y, from
 /// the multi-digit integer array, x[], propagating the borrowed 1 value until
 /// no further borrowing is neeeded or it runs out of "digits" in x.  The result
 /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
@@ -231,7 +231,7 @@ APInt& APInt::operator--() {
   return clearUnusedBits();
 }
 
-/// add - This function adds the integer array x to the integer array Y and
+/// This function adds the integer array x to the integer array Y and
 /// places the result in dest.
 /// @returns the carry out from the addition
 /// @brief General addition of 64-bit integer arrays
@@ -680,12 +680,12 @@ bool APInt::isSplat(unsigned SplatSizeInBits) const {
   return *this == rotl(SplatSizeInBits);
 }
 
-/// HiBits - This function returns the high "numBits" bits of this APInt.
+/// This function returns the high "numBits" bits of this APInt.
 APInt APInt::getHiBits(unsigned numBits) const {
   return APIntOps::lshr(*this, BitWidth - numBits);
 }
 
-/// LoBits - This function returns the low "numBits" bits of this APInt.
+/// This function returns the low "numBits" bits of this APInt.
 APInt APInt::getLoBits(unsigned numBits) const {
   return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
                         BitWidth - numBits);
@@ -861,7 +861,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
   return isNeg ? -Tmp : Tmp;
 }
 
-/// RoundToDouble - This function converts this APInt to a double.
+/// This function converts this APInt to a double.
 /// The layout for double is as following (IEEE Standard 754):
 ///  --------------------------------------
 /// |  Sign    Exponent    Fraction    Bias |
@@ -2269,9 +2269,8 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
   std::reverse(Str.begin()+StartDig, Str.end());
 }
 
-/// toString - This returns the APInt as a std::string.  Note that this is an
-/// inefficient method.  It is better to pass in a SmallVector/SmallString
-/// to the methods above.
+/// Returns the APInt as a std::string. Note that this is an inefficient method.
+/// It is better to pass in a SmallVector/SmallString to the methods above.
 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
   SmallString<40> S;
   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index af6c605..3cabc54 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -313,7 +313,7 @@ static Option *LookupNearestOption(StringRef Arg,
         if (RHS.empty() || !PermitValue)
           NearestString = OptionNames[i];
         else
-          NearestString = std::string(OptionNames[i]) + "=" + RHS.str();
+          NearestString = (Twine(OptionNames[i]) + "=" + RHS).str();
       }
     }
   }
@@ -784,7 +784,7 @@ class StrDupSaver : public StringSaver {
   std::vector<char *> Dups;
 
 public:
-  ~StrDupSaver() {
+  ~StrDupSaver() override {
     for (std::vector<char *>::iterator I = Dups.begin(), E = Dups.end(); I != E;
          ++I) {
       char *Dup = *I;
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index a44b958..c243155 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -54,9 +54,7 @@ class DataFileStreamer : public DataStreamer {
  int Fd;
 public:
   DataFileStreamer() : Fd(0) {}
-  virtual ~DataFileStreamer() {
-    close(Fd);
-  }
+  ~DataFileStreamer() override { close(Fd); }
   size_t GetBytes(unsigned char *buf, size_t len) override {
     NumStreamFetches++;
     return read(Fd, buf, len);
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index a88b18e..eb99242 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -114,9 +114,9 @@ static void debug_user_sig_handler(void *Cookie) {
   // know that debug mode is enabled and dbgs() really is a
   // circular_raw_ostream.  If NDEBUG is defined, then dbgs() ==
   // errs() but this will never be invoked.
-  llvm::circular_raw_ostream *dbgout =
-    static_cast<llvm::circular_raw_ostream *>(&llvm::dbgs());
-  dbgout->flushBufferWithBanner();
+  llvm::circular_raw_ostream &dbgout =
+      static_cast<circular_raw_ostream &>(llvm::dbgs());
+  dbgout.flushBufferWithBanner();
 }
 
 /// dbgs - Return a circular-buffered debug stream.
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 80d2aef..b8538ff 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -51,8 +51,8 @@ bool FoldingSetNodeIDRef::operator<(FoldingSetNodeIDRef RHS) const {
 ///
 void FoldingSetNodeID::AddPointer(const void *Ptr) {
   // Note: this adds pointers to the hash using sizes and endianness that
-  // depend on the host.  It doesn't matter however, because hashing on
-  // pointer values in inherently unstable.  Nothing  should depend on the 
+  // depend on the host. It doesn't matter, however, because hashing on
+  // pointer values is inherently unstable. Nothing should depend on the
   // ordering of nodes in the folding set.
   Bits.append(reinterpret_cast<unsigned *>(&Ptr),
               reinterpret_cast<unsigned *>(&Ptr+1));
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index fd4ce54..97aedc8 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -92,7 +92,7 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args,
     errs() << " done. \n";
   } else {
     sys::ExecuteNoWait(ExecPath, args.data(), nullptr, nullptr, 0, &ErrMsg);
-    errs() << "Remember to erase graph file: " << Filename.str() << "\n";
+    errs() << "Remember to erase graph file: " << Filename << "\n";
   }
   return false;
 }
@@ -140,6 +140,29 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait,
   std::string ViewerPath;
   GraphSession S;
 
+#ifdef __APPLE__
+  if (S.TryFindProgram("open", ViewerPath)) {
+    std::vector<const char *> args;
+    args.push_back(ViewerPath.c_str());
+    if (wait)
+      args.push_back("-W");
+    args.push_back(Filename.c_str());
+    args.push_back(nullptr);
+    errs() << "Trying 'open' program... ";
+    if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg))
+      return false;
+  }
+#endif
+  if (S.TryFindProgram("xdg-open", ViewerPath)) {
+    std::vector<const char *> args;
+    args.push_back(ViewerPath.c_str());
+    args.push_back(Filename.c_str());
+    args.push_back(nullptr);
+    errs() << "Trying 'xdg-open' program... ";
+    if (!ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg))
+      return false;
+  }
+
   // Graphviz
   if (S.TryFindProgram("Graphviz", ViewerPath)) {
     std::vector<const char *> args;
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 0e9a62e..726961a 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -182,19 +182,21 @@ static bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
 #endif
 }
 
-static bool OSHasAVXSupport() {
+static bool GetX86XCR0(unsigned *rEAX, unsigned *rEDX) {
 #if defined(__GNUC__)
   // Check xgetbv; this uses a .byte sequence instead of the instruction
   // directly because older assemblers do not include support for xgetbv and
   // there is no easy way to conditionally compile based on the assembler used.
-  int rEAX, rEDX;
-  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (*rEAX), "=d" (*rEDX) : "c" (0));
+  return false;
 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
-  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+  *rEAX = Result;
+  *rEDX = Result >> 32;
+  return false;
 #else
-  int rEAX = 0; // Ensures we return false
+  return true;
 #endif
-  return (rEAX & 6) == 6;
 }
 
 static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
@@ -223,19 +225,30 @@ StringRef sys::getHostCPUName() {
     char     c[12];
   } text;
 
-  GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
-
-  unsigned MaxLeaf = EAX;
-  bool HasSSE3 = (ECX & 0x1);
-  bool HasSSE41 = (ECX & 0x80000);
-  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 
+  unsigned MaxLeaf;
+  GetX86CpuIDAndInfo(0, &MaxLeaf, text.u+0, text.u+2, text.u+1);
+
+  bool HasMMX   = (EDX >> 23) & 1;
+  bool HasSSE   = (EDX >> 25) & 1;
+  bool HasSSE2  = (EDX >> 26) & 1;
+  bool HasSSE3  = (ECX >>  0) & 1;
+  bool HasSSSE3 = (ECX >>  9) & 1;
+  bool HasSSE41 = (ECX >> 19) & 1;
+  bool HasSSE42 = (ECX >> 20) & 1;
+  bool HasMOVBE = (ECX >> 22) & 1;
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   // indicates that the AVX registers will be saved and restored on context
   // switch, then we have full AVX support.
   const unsigned AVXBits = (1 << 27) | (1 << 28);
-  bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport();
-  bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 &&
-                 !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) &&
-                 (EBX & 0x20);
+  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !GetX86XCR0(&EAX, &EDX) &&
+                ((EAX & 0x6) == 0x6);
+  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+  bool HasLeaf7 = MaxLeaf >= 0x7 &&
+                  !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+  bool HasADX = HasLeaf7 && ((EBX >> 19) & 1);
+  bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20);
+  bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1);
+
   GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   bool Em64T = (EDX >> 29) & 0x1;
   bool HasTBM = (ECX >> 21) & 0x1;
@@ -298,6 +311,8 @@ StringRef sys::getHostCPUName() {
       case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
       case 13: // Intel Pentium M processor, Intel Celeron M processor, model
                // 0Dh. All processors are manufactured using the 90 nm process.
+      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+               // Integrated Processor with Intel QuickAssist Technology
         return "pentium-m";
 
       case 14: // Intel Core Duo processor, Intel Core Solo processor, model
@@ -313,74 +328,85 @@ StringRef sys::getHostCPUName() {
                // manufactured using the 65 nm process
         return "core2";
 
-      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
-               // Integrated Processor with Intel QuickAssist Technology
-        return "i686"; // FIXME: ???
-
       case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
                //
                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
-        // Not all Penryn processors support SSE 4.1 (such as the Pentium brand)
-        return HasSSE41 ? "penryn" : "core2";
+      case 29: // Intel Xeon processor MP. All processors are manufactured using
+               // the 45 nm process.
+        return "penryn";
 
       case 26: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
-      case 29: // Intel Xeon processor MP. All processors are manufactured using
-               // the 45 nm process.
       case 30: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
                // As found in a Summer 2010 model iMac.
+      case 46: // Nehalem EX
+        return "nehalem";
       case 37: // Intel Core i7, laptop version.
       case 44: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 32 nm process.
-      case 46: // Nehalem EX
       case 47: // Westmere EX
-        return "corei7";
+        return "westmere";
 
       // SandyBridge:
       case 42: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
       case 45:
-        // Not all Sandy Bridge processors support AVX (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX ? "corei7-avx" : "corei7";
+        return "sandybridge";
 
       // Ivy Bridge:
       case 58:
       case 62: // Ivy Bridge EP
-        // Not all Ivy Bridge processors support AVX (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX ? "core-avx-i" : "corei7";
+        return "ivybridge";
 
       // Haswell:
       case 60:
       case 63:
       case 69:
       case 70:
-        // Not all Haswell processors support AVX2 (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX2 ? "core-avx2" : "corei7";
+        return "haswell";
 
       // Broadwell:
       case 61:
-        // Not all Broadwell processors support AVX2 (such as the Pentium
-        // versions instead of the i7 versions).
-        return HasAVX2 ? "broadwell" : "corei7";
+        return "broadwell";
 
       case 28: // Most 45 nm Intel Atom processors
       case 38: // 45 nm Atom Lincroft
       case 39: // 32 nm Atom Medfield
       case 53: // 32 nm Atom Midview
       case 54: // 32 nm Atom Midview
-        return "atom";
+        return "bonnell";
 
       // Atom Silvermont codes from the Intel software optimization guide.
       case 55:
       case 74:
       case 77:
-        return "slm";
-
-      default: return (Em64T) ? "x86-64" : "i686";
+        return "silvermont";
+
+      default: // Unknown family 6 CPU, try to guess.
+        if (HasAVX512)
+          return "knl";
+        if (HasADX)
+          return "broadwell";
+        if (HasAVX2)
+          return "haswell";
+        if (HasAVX)
+          return "sandybridge";
+        if (HasSSE42)
+          return HasMOVBE ? "silvermont" : "nehalem";
+        if (HasSSE41)
+          return "penryn";
+        if (HasSSSE3)
+          return HasMOVBE ? "bonnell" : "core2";
+        if (Em64T)
+          return "x86-64";
+        if (HasSSE2)
+          return "pentium-m";
+        if (HasSSE)
+          return "pentium3";
+        if (HasMMX)
+          return "pentium2";
+        return "pentiumpro";
       }
     case 15: {
       switch (Model) {
@@ -681,7 +707,89 @@ StringRef sys::getHostCPUName() {
 }
 #endif
 
-#if defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  unsigned MaxLevel;
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+
+  if (GetX86CpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) ||
+      MaxLevel < 1)
+    return false;
+
+  GetX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
+
+  Features["cmov"]   = (EDX >> 15) & 1;
+  Features["mmx"]    = (EDX >> 23) & 1;
+  Features["sse"]    = (EDX >> 25) & 1;
+  Features["sse2"]   = (EDX >> 26) & 1;
+  Features["sse3"]   = (ECX >>  0) & 1;
+  Features["ssse3"]  = (ECX >>  9) & 1;
+  Features["sse4.1"] = (ECX >> 19) & 1;
+  Features["sse4.2"] = (ECX >> 20) & 1;
+
+  Features["pclmul"] = (ECX >>  1) & 1;
+  Features["cx16"]   = (ECX >> 13) & 1;
+  Features["movbe"]  = (ECX >> 22) & 1;
+  Features["popcnt"] = (ECX >> 23) & 1;
+  Features["aes"]    = (ECX >> 25) & 1;
+  Features["rdrnd"]  = (ECX >> 30) & 1;
+
+  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
+  // indicates that the AVX registers will be saved and restored on context
+  // switch, then we have full AVX support.
+  bool HasAVX = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
+                !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
+  Features["avx"]    = HasAVX;
+  Features["fma"]    = HasAVX && (ECX >> 12) & 1;
+  Features["f16c"]   = HasAVX && (ECX >> 29) & 1;
+
+  // AVX512 requires additional context to be saved by the OS.
+  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+
+  unsigned MaxExtLevel;
+  GetX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
+                     !GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
+  Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
+  Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
+  Features["xop"]    = HasAVX && HasExtLeaf1 && ((ECX >> 11) & 1);
+  Features["fma4"]   = HasAVX && HasExtLeaf1 && ((ECX >> 16) & 1);
+  Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
+
+  bool HasLeaf7 = MaxLevel >= 7 &&
+                  !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+
+  // AVX2 is only supported if we have the OS save support from AVX.
+  Features["avx2"]     = HasAVX && HasLeaf7 && (EBX >>  5) & 1;
+
+  Features["fsgsbase"] = HasLeaf7 && ((EBX >>  0) & 1);
+  Features["bmi"]      = HasLeaf7 && ((EBX >>  3) & 1);
+  Features["hle"]      = HasLeaf7 && ((EBX >>  4) & 1);
+  Features["bmi2"]     = HasLeaf7 && ((EBX >>  8) & 1);
+  Features["rtm"]      = HasLeaf7 && ((EBX >> 11) & 1);
+  Features["rdseed"]   = HasLeaf7 && ((EBX >> 18) & 1);
+  Features["adx"]      = HasLeaf7 && ((EBX >> 19) & 1);
+  Features["sha"]      = HasLeaf7 && ((EBX >> 29) & 1);
+
+  // AVX512 is only supported if the OS supports the context save for it.
+  Features["avx512f"]  = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
+  Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
+  Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
+  Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
+  Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
+  Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
+  Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
+
+  return true;
+}
+#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line
   // in all cases.
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index d0c1748..3571cd3 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -26,24 +26,6 @@ using namespace sys;
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-/// \brief A helper function to compute the elapsed wall-time since the program
-/// started.
-///
-/// Note that this routine actually computes the elapsed wall time since the
-/// first time it was called. However, we arrange to have it called during the
-/// startup of the process to get approximately correct results.
-static TimeValue getElapsedWallTime() {
-  static TimeValue &StartTime = *new TimeValue(TimeValue::now());
-  return TimeValue::now() - StartTime;
-}
-
-/// \brief A special global variable to ensure we call \c getElapsedWallTime
-/// during global initialization of the program.
-///
-/// Note that this variable is never referenced elsewhere. Doing so could
-/// create race conditions during program startup or shutdown.
-static volatile TimeValue DummyTimeValue = getElapsedWallTime();
-
 Optional<std::string> Process::FindInEnvPath(const std::string& EnvName,
                                              const std::string& FileName)
 {
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index d3e29ac..e8344ef 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -15,6 +15,7 @@
 #include "regex_impl.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include <string>
 using namespace llvm;
 
@@ -158,7 +159,7 @@ std::string Regex::sub(StringRef Repl, StringRef String,
           RefValue < Matches.size())
         Res += Matches[RefValue];
       else if (Error && Error->empty())
-        *Error = "invalid backreference string '" + Ref.str() + "'";
+        *Error = ("invalid backreference string '" + Twine(Ref) + "'").str();
       break;
     }
     }
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index d4b150a..5b43ecc 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -92,7 +92,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case sparcv9:
   case sparc:       return "sparc";
 
-  case systemz:     return "systemz";
+  case systemz:     return "s390";
 
   case x86:
   case x86_64:      return "x86";
@@ -1111,7 +1111,7 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
       .Cases("v7m", "v7-m", "cortex-m3")
       .Cases("v7em", "v7e-m", "cortex-m4")
       .Cases("v8", "v8a", "v8-a", "cortex-a53")
-      .Cases("v8.1a", "v8.1-a", "generic-armv8.1-a")
+      .Cases("v8.1a", "v8.1-a", "generic")
       .Default(nullptr);
   else
     result = llvm::StringSwitch<const char *>(MArch)
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index a9b48e0..057bcab1 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -486,12 +486,12 @@ void llvm::sys::DisableSystemDialogsOnCrash() {}
 
 /// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or
 /// SIGSEGV) is delivered to the process, print a stack trace and then exit.
-void llvm::sys::PrintStackTraceOnErrorSignal() {
+void llvm::sys::PrintStackTraceOnErrorSignal(bool DisableCrashReporting) {
   AddSignalHandler(PrintStackTraceSignalHandler, nullptr);
 
 #if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES)
   // Environment variable to disable any kind of crash dialog.
-  if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
+  if (DisableCrashReporting || getenv("LLVM_DISABLE_CRASH_REPORT")) {
     mach_port_t self = mach_task_self();
 
     exception_mask_t mask = EXC_MASK_CRASH;
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index d558ff5..b5523aa 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -261,6 +261,7 @@ std::error_code rename(const Twine &from, const Twine &to) {
                       MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
       return std::error_code();
     DWORD LastError = ::GetLastError();
+    ec = windows_error(LastError);
     if (LastError != ERROR_ACCESS_DENIED)
       break;
     // Retry MoveFile() at ACCESS_DENIED.
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index de6bf1c..f070111 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -389,7 +389,7 @@ void sys::DisableSystemDialogsOnCrash() {
 
 /// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
 /// SIGSEGV) is delivered to the process, print a stack trace and then exit.
-void sys::PrintStackTraceOnErrorSignal() {
+void sys::PrintStackTraceOnErrorSignal(bool DisableCrashReporting) {
   DisableSystemDialogsOnCrash();
   RegisterHandler();
   LeaveCriticalSection(&CriticalSection);
diff --git a/lib/Support/Windows/TimeValue.inc b/lib/Support/Windows/TimeValue.inc
index 0223ab4..b90b4f1 100644
--- a/lib/Support/Windows/TimeValue.inc
+++ b/lib/Support/Windows/TimeValue.inc
@@ -47,6 +47,7 @@ std::string TimeValue::str() const {
   __time64_t OurTime = this->toEpochTime();
   int Error = ::_localtime64_s(&Storage, &OurTime);
   assert(!Error);
+  (void)Error;
   LT = &Storage;
 #endif
 
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 051e2dd..6f9f910 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -487,51 +487,53 @@ void format_object_base::home() {
 //  raw_fd_ostream
 //===----------------------------------------------------------------------===//
 
-raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
-                               sys::fs::OpenFlags Flags)
-    : Error(false), UseAtomicWrites(false), pos(0) {
-  EC = std::error_code();
+static int getFD(StringRef Filename, std::error_code &EC,
+                 sys::fs::OpenFlags Flags) {
   // Handle "-" as stdout. Note that when we do this, we consider ourself
   // the owner of stdout. This means that we can do things like close the
   // file descriptor when we're done and set the "binary" flag globally.
   if (Filename == "-") {
-    FD = STDOUT_FILENO;
+    EC = std::error_code();
     // If user requested binary then put stdout into binary mode if
     // possible.
     if (!(Flags & sys::fs::F_Text))
       sys::ChangeStdoutToBinary();
-    // Close stdout when we're done, to detect any output errors.
-    ShouldClose = true;
-    return;
+    return STDOUT_FILENO;
   }
 
+  int FD;
   EC = sys::fs::openFileForWrite(Filename, FD, Flags);
+  if (EC)
+    return -1;
 
-  if (EC) {
-    ShouldClose = false;
-    return;
-  }
-
-  // Ok, we successfully opened the file, so it'll need to be closed.
-  ShouldClose = true;
+  return FD;
 }
 
-/// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
-/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
+                               sys::fs::OpenFlags Flags)
+    : raw_fd_ostream(getFD(Filename, EC, Flags), true) {}
+
+/// FD is the file descriptor that this writes to.  If ShouldClose is true, this
+/// closes the file when the stream is destroyed.
 raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
-  : raw_ostream(unbuffered), FD(fd),
-    ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
-#ifdef O_BINARY
-  // Setting STDOUT to binary mode is necessary in Win32
-  // to avoid undesirable linefeed conversion.
-  // Don't touch STDERR, or w*printf() (in assert()) would barf wide chars.
-  if (fd == STDOUT_FILENO)
-    setmode(fd, O_BINARY);
-#endif
+    : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose),
+      Error(false), UseAtomicWrites(false) {
+  if (FD < 0 ) {
+    ShouldClose = false;
+    return;
+  }
 
   // Get the starting position.
   off_t loc = ::lseek(FD, 0, SEEK_CUR);
-  if (loc == (off_t)-1)
+#ifdef LLVM_ON_WIN32
+  // MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes.
+  sys::fs::file_status Status;
+  std::error_code EC = status(FD, Status);
+  SupportsSeeking = !EC && Status.type() == sys::fs::file_type::regular_file;
+#else
+  SupportsSeeking = loc != (off_t)-1;
+#endif
+  if (!SupportsSeeking)
     pos = 0;
   else
     pos = static_cast<uint64_t>(loc);
@@ -623,11 +625,18 @@ void raw_fd_ostream::close() {
 uint64_t raw_fd_ostream::seek(uint64_t off) {
   flush();
   pos = ::lseek(FD, off, SEEK_SET);
-  if (pos != off)
+  if (pos == (uint64_t)-1)
     error_detected();
   return pos;
 }
 
+void raw_fd_ostream::pwrite(const char *Ptr, size_t Size, uint64_t Offset) {
+  uint64_t Pos = tell();
+  seek(Offset);
+  write(Ptr, Size);
+  seek(Pos);
+}
+
 size_t raw_fd_ostream::preferred_buffer_size() const {
 #if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
   // Windows and Minix have no st_blksize.
@@ -708,7 +717,9 @@ raw_ostream &llvm::outs() {
   // Set buffer settings to model stdout behavior.
   // Delete the file descriptor when the program exits, forcing error
   // detection. If you don't want this behavior, don't use outs().
-  static raw_fd_ostream S(STDOUT_FILENO, true);
+  std::error_code EC;
+  static raw_fd_ostream S("-", EC, sys::fs::F_None);
+  assert(!EC);
   return S;
 }
 
@@ -749,7 +760,14 @@ void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
 // capacity. This allows raw_ostream to write directly into the correct place,
 // and we only need to set the vector size when the data is flushed.
 
+raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O, unsigned)
+    : OS(O) {}
+
 raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+  init();
+}
+
+void raw_svector_ostream::init() {
   // Set up the initial external buffer. We make sure that the buffer has at
   // least 128 bytes free; raw_ostream itself only requires 64, but we want to
   // make sure that we don't grow the buffer unnecessarily on destruction (when
@@ -763,6 +781,17 @@ raw_svector_ostream::~raw_svector_ostream() {
   flush();
 }
 
+void raw_svector_ostream::pwrite(const char *Ptr, size_t Size,
+                                 uint64_t Offset) {
+  flush();
+
+  uint64_t End = Offset + Size;
+  if (End > OS.size())
+    OS.resize(End);
+
+  memcpy(OS.begin() + Offset, Ptr, Size);
+}
+
 /// resync - This is called when the SmallVector we're appending to is changed
 /// outside of the raw_svector_ostream's control.  It is only safe to do this
 /// if the raw_svector_ostream has previously been flushed.
@@ -817,3 +846,5 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
 uint64_t raw_null_ostream::current_pos() const {
   return 0;
 }
+
+void raw_null_ostream::pwrite(const char *Ptr, size_t Size, uint64_t Offset) {}
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 4ae9903..8a8f0ee 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -2040,7 +2040,7 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
 /// to CurRec's name.
 Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
                         Init *Name, const std::string &Scoper) {
-  RecTy *Type = dyn_cast<TypedInit>(Name)->getType();
+  RecTy *Type = cast<TypedInit>(Name)->getType();
 
   BinOpInit *NewName =
     BinOpInit::get(BinOpInit::STRCONCAT, 
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index 1f750fc..cbc30be 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -87,8 +87,7 @@ private:
 
 public:
   TGLexer(SourceMgr &SrcMgr);
-  ~TGLexer() {}
-  
+
   tgtok::TokKind Lex() {
     return CurCode = LexToken();
   }
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index bb3db4b..9a7d6c8 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -32,9 +32,6 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
 def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
   "Enable ARMv8 CRC-32 checksum instructions">;
 
-def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
-  "Enable ARMv8.1a extensions", [FeatureCRC]>;
-
 /// Cyclone has register move instructions which are "free".
 def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
                                         "Has zero-cycle register moves">;
@@ -44,6 +41,13 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
                                         "Has zero-cycle zeroing instructions">;
 
 //===----------------------------------------------------------------------===//
+// Architectures.
+//
+
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+  "Support ARM v8.1a instructions", [FeatureCRC]>;
+
+//===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
 
@@ -92,10 +96,6 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
                                               FeatureNEON,
                                               FeatureCRC]>;
 
-def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a,
-                                                         FeatureNEON,
-                                                         FeatureCrypto]>;
-
 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
 // FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
@@ -123,12 +123,14 @@ def AppleAsmParserVariant : AsmParserVariant {
 // AsmWriter bits get associated with the correct class.
 def GenericAsmWriter : AsmWriter {
   string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
   int Variant = 0;
   bit isMCAsmWriter = 1;
 }
 
 def AppleAsmWriter : AsmWriter {
   let AsmWriterClassName = "AppleInstPrinter";
+  int PassSubtarget = 1;
   int Variant = 1;
   int isMCAsmWriter = 1;
 }
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 1b4483a..0821cff 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -131,29 +131,6 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
     OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
     SM.serializeToStackMapSection();
   }
-
-  // Emit a .data.rel section containing any stubs that were created.
-  if (TT.isOSBinFormatELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
-
-      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        OutStreamer.EmitLabel(Stubs[i].first);
-        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
-  }
-
 }
 
 MachineLocation
@@ -371,8 +348,8 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   assert(NOps == 4);
   OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
   // cast away const; DIetc do not take const operands for some reason.
-  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
-  OS << V.getName();
+  OS << cast<MDLocalVariable>(MI->getOperand(NOps - 2).getMetadata())
+            ->getName();
   OS << " <- ";
   // Frame address.  Currently handles register +- offset only.
   assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 568f258..efdb2e3 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -328,7 +328,7 @@ static void initReachingDef(const MachineFunction &MF,
         const uint32_t *PreservedRegs = MO.getRegMask();
 
         // Set generated regs.
-        for (const auto Entry : RegToId) {
+        for (const auto &Entry : RegToId) {
           unsigned Reg = Entry.second;
           // Use the global register ID when querying APIs external to this
           // pass.
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 41b1132..c2470f7 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -698,12 +698,15 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     return expandMOVImm(MBB, MBBI, 32);
   case AArch64::MOVi64imm:
     return expandMOVImm(MBB, MBBI, 64);
-  case AArch64::RET_ReallyLR:
-    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
-        .addReg(AArch64::LR);
+  case AArch64::RET_ReallyLR: {
+    MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
+          .addReg(AArch64::LR);
+    transferImpOps(MI, MIB, MIB);
     MI.eraseFromParent();
     return true;
   }
+  }
   return false;
 }
 
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 99cb641..c3f6859 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1917,7 +1917,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
     // could select it. Emit a copy to subreg if necessary. FastISel will remove
     // it when it selects the integer extend.
     unsigned Reg = lookUpRegForValue(IntExtVal);
-    if (!Reg) {
+    auto *MI = MRI.getUniqueVRegDef(Reg);
+    if (!MI) {
       if (RetVT == MVT::i64 && VT <= MVT::i32) {
         if (WantZExt) {
           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
@@ -1935,10 +1936,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
     // The integer extend has already been emitted - delete all the instructions
     // that have been emitted by the integer extend lowering code and use the
     // result from the load instruction directly.
-    while (Reg) {
-      auto *MI = MRI.getUniqueVRegDef(Reg);
-      if (!MI)
-        break;
+    while (MI) {
       Reg = 0;
       for (auto &Opnd : MI->uses()) {
         if (Opnd.isReg()) {
@@ -1947,6 +1945,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
         }
       }
       MI->eraseFromParent();
+      MI = nullptr;
+      if (Reg)
+        MI = MRI.getUniqueVRegDef(Reg);
     }
     updateValueMap(IntExtVal, ResultReg);
     return true;
@@ -3034,6 +3035,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
 
     // Copy all of the result registers out of their specified physreg.
     MVT CopyVT = RVLocs[0].getValVT();
+
+    // TODO: Handle big-endian results
+    if (CopyVT.isVector() && !Subtarget->isLittleEndian())
+      return false;
+
     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), ResultReg)
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 84bf317..bd2af16 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -9,6 +9,82 @@
 //
 // This file contains the AArch64 implementation of TargetFrameLowering class.
 //
+// On AArch64, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// |                                   | Higher address
+// |-----------------------------------|
+// |                                   |
+// | arguments passed on the stack     |
+// |                                   |
+// |-----------------------------------| <- sp
+// |                                   | Lower address
+//
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that this doesn't depict the case where a red-zone is used. Also,
+// technically the last frame area (VLAs) doesn't get created until in the
+// main function body, after the prologue is run. However, it's depicted here
+// for completeness.
+//
+// |                                   | Higher address
+// |-----------------------------------|
+// |                                   |
+// | arguments passed on the stack     |
+// |                                   |
+// |-----------------------------------|
+// |                                   |
+// | prev_fp, prev_lr                  |
+// | (a.k.a. "frame record")           |
+// |-----------------------------------| <- fp(=x29)
+// |                                   |
+// | other callee-saved registers      |
+// |                                   |
+// |-----------------------------------|
+// |.empty.space.to.make.part.below....|
+// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
+// |.the.standard.16-byte.alignment....|  compile time; if present)
+// |-----------------------------------|
+// |                                   |
+// | local variables of fixed size     |
+// | including spill slots             |
+// |-----------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables....|       LLVM chooses X19)
+// |.(VLAs)............................| (size of this area is unknown at
+// |...................................|  compile time)
+// |-----------------------------------| <- sp
+// |                                   | Lower address
+//
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitly needed when there are both VLAs and local
+//   variables with more-than-default alignment requirements.
+// * A frame pointer is definitly needed when there are local variables with
+//   more-than-default alignment requirements.
+//
+// In some cases when a base pointer is not strictly needed, it is generated
+// anyway when offsets from the frame pointer to access local variables become
+// so large that the offset can't be encoded in the immediate fields of loads
+// or stores.
+//
+// FIXME: also explain the redzone concept.
+// FIXME: also explain the concept of reserved call frames.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AArch64FrameLowering.h"
@@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
 
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
-static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
-    if (FixedOff > Offset)
-      Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset + Align - 1) / Align * Align;
-  }
-  // This does not include the 16 bytes used for fp and lr.
-  return (unsigned)Offset;
-}
-
 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
   if (!EnableRedZone)
     return false;
@@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
 /// pointer register.
 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-  assert(!RegInfo->needsStackRealignment(MF) &&
-         "No stack realignment on AArch64!");
-#endif
-
   return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
-          MFI->hasPatchPoint());
+          MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
 }
 
 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
   AFI->setLocalStackSize(NumBytes);
 
   // Allocate space for the rest of the frame.
-  if (NumBytes) {
-    // If we're a leaf function, try using the red zone.
-    if (!canUseRedZone(MF))
-      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
+
+  const unsigned Alignment = MFI->getMaxAlignment();
+  const bool NeedsRealignment = (Alignment > 16);
+  unsigned scratchSPReg = AArch64::SP;
+  if (NeedsRealignment) {
+    // Use the first callee-saved register as a scratch register
+    assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
+           "No scratch register to align SP!");
+    scratchSPReg = AArch64::X9;
+  }
+
+  // If we're a leaf function, try using the red zone.
+  if (NumBytes && !canUseRedZone(MF))
+    // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+    // the correct value here, as NumBytes also includes padding bytes,
+    // which shouldn't be counted here.
+    emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
+                    MachineInstr::FrameSetup);
+
+  assert(!(NeedsRealignment && NumBytes==0) &&
+         "NumBytes should never be 0 when realignment is needed");
+
+  if (NumBytes && NeedsRealignment) {
+    const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+    assert(NrBitsToZero > 1);
+    assert(scratchSPReg != AArch64::SP);
+
+    // SUB X9, SP, NumBytes
+    //   -- X9 is temporary register, so shouldn't contain any live data here,
+    //   -- free to use. This is already produced by emitFrameOffset above.
+    // AND SP, X9, 0b11111...0000
+    // The logical immediates have a non-trivial encoding. The following
+    // formula computes the encoded immediate with all ones but
+    // NrBitsToZero zero bits as least significant bits.
+    uint32_t andMaskEncoded =
+        (1                   <<12) // = N
+      | ((64-NrBitsToZero)   << 6) // immr
+      | ((64-NrBitsToZero-1) << 0) // imms
+      ;
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+      .addReg(scratchSPReg, RegState::Kill)
+      .addImm(andMaskEncoded);
   }
 
   // If we need a base pointer, set it up here. It's whatever the value of the
@@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
   // FIXME: Clarify FrameSetup flags here.
   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
   // needed.
-  //
-  if (RegInfo->hasBasePointer(MF))
-    TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
+  if (RegInfo->hasBasePointer(MF)) {
+    TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
+                     false);
+  }
 
   if (needsFrameMoves) {
     const DataLayout *TD = MF.getTarget().getDataLayout();
     const int StackGrowth = -TD->getPointerSize(0);
     unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
     // An example of the prologue:
     //
     //     .globl __foo
@@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     return;
 
-  // Initial and residual are named for consitency with the prologue. Note that
+  // Initial and residual are named for consistency with the prologue. Note that
   // in the epilogue, the residual adjustment is executed first.
   uint64_t ArgumentPopSize = 0;
   if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
@@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
   bool isFixed = MFI->isFixedObjectIndex(FI);
 
   // Use frame pointer to reference fixed objects. Use it for locals if
-  // there are VLAs (and thus the SP isn't reliable as a base).
-  // Make sure useFPForScavengingIndex() does the right thing for the emergency
-  // spill slot.
+  // there are VLAs or a dynamically realigned SP (and thus the SP isn't
+  // reliable as a base). Make sure useFPForScavengingIndex() does the
+  // right thing for the emergency spill slot.
   bool UseFP = false;
   if (AFI->hasStackFrame()) {
     // Note: Keeping the following as multiple 'if' statements rather than
@@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
     // Argument access should always use the FP.
     if (isFixed) {
       UseFP = hasFP(MF);
-    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
+    } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
+               !RegInfo->needsStackRealignment(MF)) {
       // Use SP or FP, whichever gives us the best chance of the offset
       // being in range for direct access. If the FPOffset is positive,
       // that'll always be best, as the SP will be even further away.
@@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
     }
   }
 
+  assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+         "In the presence of dynamic stack pointer realignment, "
+         "non-argument objects cannot be accessed through the frame pointer");
+
   if (UseFP) {
     FrameReg = RegInfo->getFrameRegister(MF);
     return FPOffset;
@@ -695,6 +787,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
     if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
       MIB.addReg(AArch64::SP, RegState::Define);
 
+    MBB.addLiveIn(Reg1);
+    MBB.addLiveIn(Reg2);
     MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
         .addReg(Reg1, getPrologueDeath(MF, Reg1))
         .addReg(AArch64::SP)
@@ -794,6 +888,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
   if (RegInfo->hasBasePointer(MF))
     MRI->setPhysRegUsed(RegInfo->getBaseRegister());
 
+  if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
+    MRI->setPhysRegUsed(AArch64::X9);
+
   // If any callee-saved registers are used, the frame cannot be eliminated.
   unsigned NumGPRSpilled = 0;
   unsigned NumFPRSpilled = 0;
@@ -867,7 +964,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+  unsigned CFSize =
+      MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
   bool BigStack = (CFSize >= 256);
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index df3875f..1439bf3 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
 public:
   explicit AArch64FrameLowering()
       : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
-                            false /*StackRealignable*/) {}
+                            true /*StackRealignable*/) {}
 
   void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 0a47dcb..f75700d 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -848,7 +848,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   //     MOV  X0, WideImmediate
   //     LDR  X2, [BaseReg, X0]
   if (isa<ConstantSDNode>(RHS)) {
-    int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue();
+    int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
     unsigned Scale = Log2_32(Size);
     // Skip the immediate can be seleced by load/store addressing mode.
     // Also skip the immediate can be encoded by a single ADD (SUB is also
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0c0e856..90a5e5e 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -281,14 +281,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
 
-  // f16 is storage-only, so we promote operations to f32 if we know this is
-  // valid, and ignore them otherwise. The operations not mentioned here will
-  // fail to select, but this is not a major problem as no source language
-  // should be emitting native f16 operations yet.
-  setOperationAction(ISD::FADD, MVT::f16, Promote);
-  setOperationAction(ISD::FDIV, MVT::f16, Promote);
-  setOperationAction(ISD::FMUL, MVT::f16, Promote);
-  setOperationAction(ISD::FSUB, MVT::f16, Promote);
+  // f16 is a storage-only type, always promote it to f32.
+  setOperationAction(ISD::SETCC,       MVT::f16,  Promote);
+  setOperationAction(ISD::BR_CC,       MVT::f16,  Promote);
+  setOperationAction(ISD::SELECT_CC,   MVT::f16,  Promote);
+  setOperationAction(ISD::SELECT,      MVT::f16,  Promote);
+  setOperationAction(ISD::FADD,        MVT::f16,  Promote);
+  setOperationAction(ISD::FSUB,        MVT::f16,  Promote);
+  setOperationAction(ISD::FMUL,        MVT::f16,  Promote);
+  setOperationAction(ISD::FDIV,        MVT::f16,  Promote);
+  setOperationAction(ISD::FREM,        MVT::f16,  Promote);
+  setOperationAction(ISD::FMA,         MVT::f16,  Promote);
+  setOperationAction(ISD::FNEG,        MVT::f16,  Promote);
+  setOperationAction(ISD::FABS,        MVT::f16,  Promote);
+  setOperationAction(ISD::FCEIL,       MVT::f16,  Promote);
+  setOperationAction(ISD::FCOPYSIGN,   MVT::f16,  Promote);
+  setOperationAction(ISD::FCOS,        MVT::f16,  Promote);
+  setOperationAction(ISD::FFLOOR,      MVT::f16,  Promote);
+  setOperationAction(ISD::FNEARBYINT,  MVT::f16,  Promote);
+  setOperationAction(ISD::FPOW,        MVT::f16,  Promote);
+  setOperationAction(ISD::FPOWI,       MVT::f16,  Promote);
+  setOperationAction(ISD::FRINT,       MVT::f16,  Promote);
+  setOperationAction(ISD::FSIN,        MVT::f16,  Promote);
+  setOperationAction(ISD::FSINCOS,     MVT::f16,  Promote);
+  setOperationAction(ISD::FSQRT,       MVT::f16,  Promote);
+  setOperationAction(ISD::FEXP,        MVT::f16,  Promote);
+  setOperationAction(ISD::FEXP2,       MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG,        MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG2,       MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG10,      MVT::f16,  Promote);
+  setOperationAction(ISD::FROUND,      MVT::f16,  Promote);
+  setOperationAction(ISD::FTRUNC,      MVT::f16,  Promote);
+  setOperationAction(ISD::FMINNUM,     MVT::f16,  Promote);
+  setOperationAction(ISD::FMAXNUM,     MVT::f16,  Promote);
 
   // v4f16 is also a storage-only type, so promote it to v4f32 when that is
   // known to be safe.
@@ -481,6 +506,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   // Enable TBZ/TBNZ
   MaskAndBranchFoldingIsLegal = true;
+  EnableExtLdPromotion = true;
 
   setMinFunctionAlignment(2);
 
@@ -1557,6 +1583,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
   if (Op.getOperand(0).getValueType().isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
 
+  // f16 conversions are promoted to f32.
+  if (Op.getOperand(0).getValueType() == MVT::f16) {
+    SDLoc dl(Op);
+    return DAG.getNode(
+        Op.getOpcode(), dl, Op.getValueType(),
+        DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+  }
+
   if (Op.getOperand(0).getValueType() != MVT::f128) {
     // It's legal except when f128 is involved
     return Op;
@@ -1606,6 +1640,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
   if (Op.getValueType().isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
 
+  // f16 conversions are promoted to f32.
+  if (Op.getValueType() == MVT::f16) {
+    SDLoc dl(Op);
+    return DAG.getNode(
+        ISD::FP_ROUND, dl, MVT::f16,
+        DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+        DAG.getIntPtrConstant(0));
+  }
+
   // i128 conversions are libcalls.
   if (Op.getOperand(0).getValueType() == MVT::i128)
     return SDValue();
@@ -2701,8 +2744,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
             DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
         SDValue Cpy = DAG.getMemcpy(
             Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
-            /*isVol = */ false,
-            /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
+            /*isVol = */ false, /*AlwaysInline = */ false,
+            /*isTailCall = */ false,
+            DstInfo, MachinePointerInfo());
 
         MemOpChains.push_back(Cpy);
       } else {
@@ -3514,49 +3558,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
   return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
 }
 
-SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
-                                           SelectionDAG &DAG) const {
-  SDValue CC = Op->getOperand(0);
-  SDValue TVal = Op->getOperand(1);
-  SDValue FVal = Op->getOperand(2);
-  SDLoc DL(Op);
-
-  unsigned Opc = CC.getOpcode();
-  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
-  // instruction.
-  if (CC.getResNo() == 1 &&
-      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
-    // Only lower legal XALUO ops.
-    if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
-      return SDValue();
-
-    AArch64CC::CondCode OFCC;
-    SDValue Value, Overflow;
-    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
-    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
-    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
-                       CCVal, Overflow);
-  }
-
-  if (CC.getOpcode() == ISD::SETCC)
-    return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
-                           cast<CondCodeSDNode>(CC.getOperand(2))->get());
-  else
-    return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
-                           FVal, ISD::SETNE);
-}
-
-SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
+                                              SDValue RHS, SDValue TVal,
+                                              SDValue FVal, SDLoc dl,
                                               SelectionDAG &DAG) const {
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  SDValue TVal = Op.getOperand(2);
-  SDValue FVal = Op.getOperand(3);
-  SDLoc dl(Op);
-
   // Handle f128 first, because it will result in a comparison of some RTLIB
   // call result against zero.
   if (LHS.getValueType() == MVT::f128) {
@@ -3664,14 +3669,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
     SDValue CCVal;
     SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
 
-    EVT VT = Op.getValueType();
+    EVT VT = TVal.getValueType();
     return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
   }
 
   // Now we know we're dealing with FP values.
   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
   assert(LHS.getValueType() == RHS.getValueType());
-  EVT VT = Op.getValueType();
+  EVT VT = TVal.getValueType();
 
   // Try to match this select into a max/min operation, which have dedicated
   // opcode in the instruction set.
@@ -3732,6 +3737,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
   return CS1;
 }
 
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TVal = Op.getOperand(2);
+  SDValue FVal = Op.getOperand(3);
+  SDLoc DL(Op);
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue CCVal = Op->getOperand(0);
+  SDValue TVal = Op->getOperand(1);
+  SDValue FVal = Op->getOperand(2);
+  SDLoc DL(Op);
+
+  unsigned Opc = CCVal.getOpcode();
+  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
+  // instruction.
+  if (CCVal.getResNo() == 1 &&
+      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+    // Only lower legal XALUO ops.
+    if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
+      return SDValue();
+
+    AArch64CC::CondCode OFCC;
+    SDValue Value, Overflow;
+    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
+    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
+
+    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+                       CCVal, Overflow);
+  }
+
+  // Lower it the same way as we would lower a SELECT_CC node.
+  ISD::CondCode CC;
+  SDValue LHS, RHS;
+  if (CCVal.getOpcode() == ISD::SETCC) {
+    LHS = CCVal.getOperand(0);
+    RHS = CCVal.getOperand(1);
+    CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
+  } else {
+    LHS = CCVal;
+    RHS = DAG.getConstant(0, CCVal.getValueType());
+    CC = ISD::SETNE;
+  }
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
                                               SelectionDAG &DAG) const {
   // Jump table entries as PC relative offsets. No additional tweaking
@@ -3920,7 +3977,7 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
 
   return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
                        Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
-                       8, false, false, MachinePointerInfo(DestSV),
+                       8, false, false, false, MachinePointerInfo(DestSV),
                        MachinePointerInfo(SrcSV));
 }
 
@@ -4989,7 +5046,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
     unsigned Opcode;
     if (EltTy == MVT::i8)
       Opcode = AArch64ISD::DUPLANE8;
-    else if (EltTy == MVT::i16)
+    else if (EltTy == MVT::i16 || EltTy == MVT::f16)
       Opcode = AArch64ISD::DUPLANE16;
     else if (EltTy == MVT::i32 || EltTy == MVT::f32)
       Opcode = AArch64ISD::DUPLANE32;
@@ -6554,6 +6611,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
           VT1.getSizeInBits() <= 32);
 }
 
+bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
+  if (isa<FPExtInst>(Ext))
+    return false;
+
+  // Vector types are next free.
+  if (Ext->getType()->isVectorTy())
+    return false;
+
+  for (const Use &U : Ext->uses()) {
+    // The extension is free if we can fold it with a left shift in an
+    // addressing mode or an arithmetic operation: add, sub, and cmp.
+
+    // Is there a shift?
+    const Instruction *Instr = cast<Instruction>(U.getUser());
+
+    // Is this a constant shift?
+    switch (Instr->getOpcode()) {
+    case Instruction::Shl:
+      if (!isa<ConstantInt>(Instr->getOperand(1)))
+        return false;
+      break;
+    case Instruction::GetElementPtr: {
+      gep_type_iterator GTI = gep_type_begin(Instr);
+      std::advance(GTI, U.getOperandNo());
+      Type *IdxTy = *GTI;
+      // This extension will end up with a shift because of the scaling factor.
+      // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
+      // Get the shift amount based on the scaling factor:
+      // log2(sizeof(IdxTy)) - log2(8).
+      uint64_t ShiftAmt =
+        countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+      // Is the constant foldable in the shift of the addressing mode?
+      // I.e., shift amount is between 1 and 4 inclusive.
+      if (ShiftAmt == 0 || ShiftAmt > 4)
+        return false;
+      break;
+    }
+    case Instruction::Trunc:
+      // Check if this is a noop.
+      // trunc(sext ty1 to ty2) to ty1.
+      if (Instr->getType() == Ext->getOperand(0)->getType())
+        continue;
+    // FALL THROUGH.
+    default:
+      return false;
+    }
+
+    // At this point we can use the bfm family, so this extension is free
+    // for that use.
+  }
+  return true;
+}
+
 bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
                                           unsigned &RequiredAligment) const {
   if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
@@ -6597,7 +6707,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
        (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
     return MVT::f128;
 
-  return Size >= 8 ? MVT::i64 : MVT::i32;
+  if (Size >= 8 &&
+      (memOpAlign(SrcAlign, DstAlign, 8) ||
+       (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+    return MVT::i64;
+
+  if (Size >= 4 &&
+      (memOpAlign(SrcAlign, DstAlign, 4) ||
+       (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+    return MVT::i32;
+
+  return MVT::Other;
 }
 
 // 12-bit optionally shifted immediates are legal for adds.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 5ff11e8..820613b 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -355,6 +355,8 @@ public:
   getPreferredVectorAction(EVT VT) const override;
 
 private:
+  bool isExtFreeImpl(const Instruction *Ext) const override;
+
   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
   const AArch64Subtarget *Subtarget;
@@ -418,6 +420,9 @@ private:
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
+                         SDValue TVal, SDValue FVal, SDLoc dl,
+                         SelectionDAG &DAG) const;
   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index d295c02..0c0efaf 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1637,10 +1637,16 @@ multiclass AddSub<bit isSub, string mnemonic,
                   SDPatternOperator OpNode = null_frag> {
   let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   // Add/Subtract immediate
+  // Increase the weight of the immediate variant to try to match it before
+  // the extended register variant.
+  // We used to match the register variant before the immediate when the
+  // register argument could be implicitly zero-extended.
+  let AddedComplexity = 6 in
   def Wri  : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
                            mnemonic, OpNode> {
     let Inst{31} = 0;
   }
+  let AddedComplexity = 6 in
   def Xri  : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
                            mnemonic, OpNode> {
     let Inst{31} = 1;
@@ -3282,6 +3288,10 @@ class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
     : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
   bits<5> Rt;
   bits<5> Rn;
+  let Inst{20-16} = 0b11111;
+  let Unpredictable{20-16} = 0b11111;
+  let Inst{14-10} = 0b11111;
+  let Unpredictable{14-10} = 0b11111;
   let Inst{9-5} = Rn;
   let Inst{4-0} = Rt;
 
@@ -5298,6 +5308,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{4-0}   = Rd;
 }
 
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
+                        dag oops, dag iops, string asm,
+            list<dag> pattern>
+  : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
+    Sched<[WriteV]> {
+  bits<5> Rd;
+  bits<5> Rn;
+  bits<5> Rm;
+  let Inst{31-30} = 0b01;
+  let Inst{29}    = U;
+  let Inst{28-24} = 0b11110;
+  let Inst{23-22} = size;
+  let Inst{21}    = R;
+  let Inst{20-16} = Rm;
+  let Inst{15-11} = opcode;
+  let Inst{10}    = 1;
+  let Inst{9-5}   = Rn;
+  let Inst{4-0}   = Rd;
+}
+
 multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
                             SDPatternOperator OpNode> {
   def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
@@ -5325,6 +5356,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
   def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
 }
 
+multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
+                                 SDPatternOperator OpNode = null_frag> {
+  def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
+                                     (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm), 
+                                     asm, []>;
+  def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst),
+                                     (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm), 
+                                     asm, []>;
+}
+
 multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
@@ -5885,7 +5926,7 @@ multiclass SIMDIns {
     let Inst{20-18} = idx;
     let Inst{17-16} = 0b10;
     let Inst{14-12} = idx2;
-    let Inst{11} = 0;
+    let Inst{11} = {?};
   }
   def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
     bits<2> idx;
@@ -5893,7 +5934,7 @@ multiclass SIMDIns {
     let Inst{20-19} = idx;
     let Inst{18-16} = 0b100;
     let Inst{14-13} = idx2;
-    let Inst{12-11} = 0;
+    let Inst{12-11} = {?,?};
   }
   def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
     bits<1> idx;
@@ -5901,7 +5942,7 @@ multiclass SIMDIns {
     let Inst{20} = idx;
     let Inst{19-16} = 0b1000;
     let Inst{14} = idx2;
-    let Inst{13-11} = 0;
+    let Inst{13-11} = {?,?,?};
   }
 
   // For all forms of the INS instruction, the "mov" mnemonic is the
@@ -8517,6 +8558,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
 } // end of 'let Predicates = [HasNEON]'
 
 //----------------------------------------------------------------------------
+// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
+//----------------------------------------------------------------------------
+
+let Predicates = [HasNEON, HasV8_1a] in {
+
+class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
+                                    RegisterOperand regtype, string asm, 
+                                    string kind, list<dag> pattern>
+  : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind, 
+                                pattern> {
+  let Inst{21}=0;
+}
+multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
+                                             SDPatternOperator Accum> {
+  def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h",
+    [(set (v4i16 V64:$dst),
+          (Accum (v4i16 V64:$Rd),
+                 (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn),
+                                                   (v4i16 V64:$Rm)))))]>;         
+  def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h",
+    [(set (v8i16 V128:$dst),
+          (Accum (v8i16 V128:$Rd),
+                 (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn),
+                                                   (v8i16 V128:$Rm)))))]>;
+  def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s",
+    [(set (v2i32 V64:$dst),
+          (Accum (v2i32 V64:$Rd),
+                 (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn),
+                                                   (v2i32 V64:$Rm)))))]>;
+  def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s",
+    [(set (v4i32 V128:$dst),
+          (Accum (v4i32 V128:$Rd),
+                 (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn),
+                                                   (v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
+                                     SDPatternOperator Accum> {
+  def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+                                          V64, V64, V128_lo, VectorIndexH,
+                                          asm, ".4h", ".4h", ".4h", ".h",
+    [(set (v4i16 V64:$dst),
+          (Accum (v4i16 V64:$Rd),
+                 (v4i16 (int_aarch64_neon_sqrdmulh
+                          (v4i16 V64:$Rn),
+                          (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+                                                    VectorIndexH:$idx))))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+                                          V128, V128, V128_lo, VectorIndexH,
+                                          asm, ".8h", ".8h", ".8h", ".h",
+    [(set (v8i16 V128:$dst),
+          (Accum (v8i16 V128:$Rd),
+                 (v8i16 (int_aarch64_neon_sqrdmulh
+                          (v8i16 V128:$Rn),
+                          (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+                                                   VectorIndexH:$idx))))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+                                          V64, V64, V128, VectorIndexS,
+                                          asm, ".2s", ".2s", ".2s", ".s",
+    [(set (v2i32 V64:$dst),
+        (Accum (v2i32 V64:$Rd),
+               (v2i32 (int_aarch64_neon_sqrdmulh
+                        (v2i32 V64:$Rn),
+                        (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+                                                 VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+
+  // FIXME: it would be nice to use the scalar (v1i32) instruction here, but 
+  // an intermediate EXTRACT_SUBREG would be untyped.
+  // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we 
+  // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..)))
+  def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+                       (i32 (vector_extract 
+                               (v4i32 (insert_subvector
+                                       (undef), 
+                                        (v2i32 (int_aarch64_neon_sqrdmulh 
+                                                 (v2i32 V64:$Rn),
+                                                 (v2i32 (AArch64duplane32 
+                                                          (v4i32 V128:$Rm),
+                                                          VectorIndexS:$idx)))),
+                                      (i32 0))),
+                               (i64 0))))),
+            (EXTRACT_SUBREG
+                (v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
+                          (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 
+                                                FPR32Op:$Rd, 
+                                                ssub)), 
+                          V64:$Rn,
+                          V128:$Rm, 
+                          VectorIndexS:$idx)),
+                ssub)>;
+
+  def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+                                          V128, V128, V128, VectorIndexS,
+                                          asm, ".4s", ".4s", ".4s", ".s",
+    [(set (v4i32 V128:$dst),
+          (Accum (v4i32 V128:$Rd),
+                 (v4i32 (int_aarch64_neon_sqrdmulh
+                          (v4i32 V128:$Rn),
+                          (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+                                                   VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+
+  // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+  // an intermediate EXTRACT_SUBREG would be untyped.
+  def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+                        (i32 (vector_extract 
+                               (v4i32 (int_aarch64_neon_sqrdmulh 
+                                        (v4i32 V128:$Rn),
+                                        (v4i32 (AArch64duplane32 
+                                                 (v4i32 V128:$Rm),
+                                                 VectorIndexS:$idx)))),
+                               (i64 0))))),
+            (EXTRACT_SUBREG
+                (v4i32 (!cast<Instruction>(NAME # v4i32_indexed)
+                         (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 
+                                               FPR32Op:$Rd, 
+                                               ssub)), 
+                         V128:$Rn,
+                         V128:$Rm, 
+                         VectorIndexS:$idx)),
+                ssub)>;
+
+  def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+                                        FPR16Op, FPR16Op, V128_lo,
+                                        VectorIndexH, asm, ".h", "", "", ".h", 
+                                        []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+                                        FPR32Op, FPR32Op, V128, VectorIndexS,
+                                        asm, ".s", "", "", ".s",
+    [(set (i32 FPR32Op:$dst),
+          (Accum (i32 FPR32Op:$Rd),
+                 (i32 (int_aarch64_neon_sqrdmulh
+                        (i32 FPR32Op:$Rn),
+                        (i32 (vector_extract (v4i32 V128:$Rm),
+                                             VectorIndexS:$idx))))))]> {
+    bits<2> idx;
+    let Inst{11} = idx{1};
+    let Inst{21} = idx{0};
+  }
+}
+} // let Predicates = [HasNeon, HasV8_1a]
+
+//----------------------------------------------------------------------------
 // Crypto extensions
 //----------------------------------------------------------------------------
 
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8e0af2d..db231c4 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1526,7 +1526,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
   }
 
   for (; SubReg != End; SubReg += Incr) {
-    const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
+    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
@@ -1904,7 +1904,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
   }
   assert(Opc && "Unknown register class");
 
-  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
                                       .addReg(SrcReg, getKillRegState(isKill))
                                       .addFrameIndex(FI);
 
@@ -2002,7 +2002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
   }
   assert(Opc && "Unknown register class");
 
-  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
                                       .addReg(DestReg, getDefRegState(true))
                                       .addFrameIndex(FI);
   if (Offset)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index ec6fa5c..f7db50a 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,6 +14,8 @@
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
+def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
+                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
                                AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
@@ -22,8 +24,6 @@ def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
                                  AssemblerPredicate<"FeatureCrypto", "crypto">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
-def HasV8_1a         : Predicate<"Subtarget->hasV8_1a()">,
-                                 AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 def IsCyclone        : Predicate<"Subtarget->isCyclone()">;
@@ -2314,6 +2314,20 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
 def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
 def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
 
+let Predicates = [HasV8_1a] in {
+  // v8.1a "Limited Order Region" extension load-acquire instructions
+  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
+  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
+  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
+  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
+
+  // v8.1a "Limited Order Region" extension store-release instructions
+  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
+  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
+  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
+  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
+}
+
 //===----------------------------------------------------------------------===//
 // Scaled floating point to integer conversion instructions.
 //===----------------------------------------------------------------------===//
@@ -2778,6 +2792,10 @@ defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
 defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
 defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
 defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
+defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
+                                                  int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
+                                                    int_aarch64_neon_sqsub>;
 
 defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
 defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
@@ -2994,6 +3012,20 @@ defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
 defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
 defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
 defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
+let Predicates = [HasV8_1a] in {
+  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
+  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
+  def : Pat<(i32 (int_aarch64_neon_sqadd
+                   (i32 FPR32:$Rd),
+                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+                                                   (i32 FPR32:$Rm))))),
+            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+  def : Pat<(i32 (int_aarch64_neon_sqsub
+                   (i32 FPR32:$Rd),
+                   (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+                                                   (i32 FPR32:$Rm))))),
+            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+}
 
 def : InstAlias<"cmls $dst, $src1, $src2",
                 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
@@ -3478,13 +3510,13 @@ def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
 // AdvSIMD INS/DUP instructions
 //----------------------------------------------------------------------------
 
-def DUPv8i8gpr  : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
-def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
-def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
-def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
-def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
-def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
-def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
+def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
+def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
+def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
+def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
+def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
+def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
+def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
 
 def DUPv2i64lane : SIMDDup64FromElement;
 def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
@@ -4324,6 +4356,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
                                            int_aarch64_neon_sqadd>;
 defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
                                            int_aarch64_neon_sqsub>;
+defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
+                                          int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
+                                          int_aarch64_neon_sqsub>;
 defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
 defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
     TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 33c11fe..1836682 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   // large enough that referencing from the FP won't result in things being
   // in range relatively often, we can use a base pointer to allow access
   // from the other direction like the SP normally works.
+  // Furthermore, if both variable sized objects are present, and the
+  // stack needs to be dynamically re-aligned, the base pointer is the only
+  // reliable way to reference the locals.
   if (MFI->hasVarSizedObjects()) {
+    if (needsStackRealignment(MF))
+      return true;
     // Conservatively estimate whether the negative offset from the frame
     // pointer will be sufficient to reach. If a function has a smallish
     // frame, it's less likely to have lots of spills and callee saved
@@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   return false;
 }
 
+bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+
+  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+    return false;
+
+  return true;
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool
+AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = MF.getTarget()
+                            .getSubtargetImpl(*MF.getFunction())
+                            ->getFrameLowering()
+                            ->getStackAlignment();
+  bool requiresRealignment =
+      ((MFI->getMaxAlignment() > StackAlign) ||
+       F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::StackAlignment));
+
+  return requiresRealignment && canRealignStack(MF);
+}
+
 unsigned
 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index c01bfa5..8c379d9 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -93,6 +93,9 @@ public:
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
+  // Base pointer (stack realignment) support.
+  bool canRealignStack(const MachineFunction &MF) const;
+  bool needsStackRealignment(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 3ec4157..ca4457a 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -60,7 +60,12 @@ include "AArch64SchedA57WriteRes.td"
 // Cortex-A57. The Cortex-A57 types are directly associated with resources, so
 // defining the aliases precludes the need for mapping them using WriteRes. The
 // aliases are sufficient for creating a coarse, working model. As the model
-// evolves, InstRWs will be used to override these SchedAliases.
+// evolves, InstRWs will be used to override some of these SchedAliases.
+//
+// WARNING: Using SchedAliases is convenient and works well for latency and
+//          resource lookup for instructions. However, this creates an entry in
+//          AArch64WriteLatencyTable with a WriteResourceID of 0, breaking
+//          any SchedReadAdvance since the lookup will fail.
 
 def : SchedAlias<WriteImm,   A57Write_1cyc_1I>;
 def : SchedAlias<WriteI,     A57Write_1cyc_1I>;
@@ -70,8 +75,8 @@ def : SchedAlias<WriteExtr,  A57Write_1cyc_1I>;
 def : SchedAlias<WriteIS,    A57Write_1cyc_1I>;
 def : SchedAlias<WriteID32,  A57Write_19cyc_1M>;
 def : SchedAlias<WriteID64,  A57Write_35cyc_1M>;
-def : SchedAlias<WriteIM32,  A57Write_3cyc_1M>;
-def : SchedAlias<WriteIM64,  A57Write_5cyc_1M>;
+def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; }
 def : SchedAlias<WriteBr,    A57Write_1cyc_1B>;
 def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
 def : SchedAlias<WriteLD,    A57Write_4cyc_1L>;
@@ -127,6 +132,15 @@ def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
 def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
 
 
+// Shifted Register with Shift == 0
+// ----------------------------------------------------------------------------
+
+def A57WriteISReg : SchedWriteVariant<[
+       SchedVar<RegShiftedPred, [WriteISReg]>,
+       SchedVar<NoSchedPred, [WriteI]>]>;
+def : InstRW<[A57WriteISReg], (instregex ".*rs$")>;
+
+
 // Divide and Multiply Instructions
 // -----------------------------------------------------------------------------
 
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 221d70d..0b97af8 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -47,8 +47,9 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+      HasV8_1aOps(false), 
       HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
-      HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+      HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
       IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS)),
       TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index bcab97d..5454b20 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -37,11 +37,12 @@ protected:
   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
   ARMProcFamilyEnum ARMProcFamily;
 
+  bool HasV8_1aOps;
+
   bool HasFPARMv8;
   bool HasNEON;
   bool HasCrypto;
   bool HasCRC;
-  bool HasV8_1a;
 
   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
   bool HasZeroCycleRegMove;
@@ -93,6 +94,8 @@ public:
     return isCortexA53() || isCortexA57();
   }
 
+  bool hasV8_1aOps() const { return HasV8_1aOps; }
+
   bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
 
   bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
@@ -101,7 +104,6 @@ public:
   bool hasNEON() const { return HasNEON; }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
-  bool hasV8_1a() const { return HasV8_1a; }
 
   bool isLittleEndian() const { return IsLittle; }
 
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index f902f64..ab28a16 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -87,6 +87,11 @@ EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
              cl::desc("Enable optimizations on complex GEPs"),
              cl::init(true));
 
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
+                  cl::desc("Enable the global merge pass"));
+
 extern "C" void LLVMInitializeAArch64Target() {
   // Register the target.
   RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@@ -245,7 +250,9 @@ bool AArch64PassConfig::addPreISel() {
   // FIXME: On AArch64, this depends on the type.
   // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
   // and the offset has to be a multiple of the related size in bytes.
-  if (TM->getOptLevel() == CodeGenOpt::Aggressive)
+  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+       EnableGlobalMerge == cl::BOU_UNSET) ||
+      EnableGlobalMerge == cl::BOU_TRUE)
     addPass(createGlobalMergePass(TM, 4095));
   if (TM->getOptLevel() != CodeGenOpt::None)
     addPass(createAArch64AddressTypePromotionPass());
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1219ffc..063c714 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1972,7 +1972,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
 
     bool Valid;
     auto Mapper = AArch64PRFM::PRFMMapper();
-    StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+    StringRef Name = 
+        Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
     Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
                                                       S, getContext()));
     return MatchOperand_Success;
@@ -1985,7 +1986,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
 
   bool Valid;
   auto Mapper = AArch64PRFM::PRFMMapper();
-  unsigned prfop = Mapper.fromString(Tok.getString(), Valid);
+  unsigned prfop = 
+      Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
   if (!Valid) {
     TokError("pre-fetch hint expected");
     return MatchOperand_ParseFail;
@@ -2090,15 +2092,16 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.is(AsmToken::Real)) {
     APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+    if (isNegative)
+      RealVal.changeSign();
+
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
-    // If we had a '-' in front, toggle the sign bit.
-    IntVal ^= (uint64_t)isNegative << 63;
     int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
     Parser.Lex(); // Eat the token.
     // Check for out of range values. As an exception, we let Zero through,
     // as we handle that special case in post-processing before matching in
     // order to use the zero register for it.
-    if (Val == -1 && !RealVal.isZero()) {
+    if (Val == -1 && !RealVal.isPosZero()) {
       TokError("expected compatible register or floating-point constant");
       return MatchOperand_ParseFail;
     }
@@ -2597,7 +2600,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
     }
     bool Valid;
     auto Mapper = AArch64DB::DBarrierMapper();
-    StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+    StringRef Name = 
+        Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
     Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
                                                       ExprLoc, getContext()));
     return MatchOperand_Success;
@@ -2610,7 +2614,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
 
   bool Valid;
   auto Mapper = AArch64DB::DBarrierMapper();
-  unsigned Opt = Mapper.fromString(Tok.getString(), Valid);
+  unsigned Opt = 
+      Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
   if (!Valid) {
     TokError("invalid barrier option name");
     return MatchOperand_ParseFail;
@@ -2638,18 +2643,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
     return MatchOperand_NoMatch;
 
   bool IsKnown;
-  auto MRSMapper = AArch64SysReg::MRSMapper(STI.getFeatureBits());
-  uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), IsKnown);
+  auto MRSMapper = AArch64SysReg::MRSMapper();
+  uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+                                         IsKnown);
   assert(IsKnown == (MRSReg != -1U) &&
          "register should be -1 if and only if it's unknown");
 
-  auto MSRMapper = AArch64SysReg::MSRMapper(STI.getFeatureBits());
-  uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), IsKnown);
+  auto MSRMapper = AArch64SysReg::MSRMapper();
+  uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+                                         IsKnown);
   assert(IsKnown == (MSRReg != -1U) &&
          "register should be -1 if and only if it's unknown");
 
   auto PStateMapper = AArch64PState::PStateMapper();
-  uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown);
+  uint32_t PStateField = 
+      PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown);
   assert(IsKnown == (PStateField != -1U) &&
          "register should be -1 if and only if it's unknown");
 
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index fb25089..1c8c0a66 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1102,6 +1102,12 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::STLRW:
   case AArch64::STLRB:
   case AArch64::STLRH:
+  case AArch64::STLLRW:
+  case AArch64::STLLRB:
+  case AArch64::STLLRH:
+  case AArch64::LDLARW:
+  case AArch64::LDLARB:
+  case AArch64::LDLARH:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
     break;
   case AArch64::STLXRX:
@@ -1112,6 +1118,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::LDAXRX:
   case AArch64::LDXRX:
   case AArch64::STLRX:
+  case AArch64::LDLARX:
+  case AArch64::STLLRX:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
     break;
   case AArch64::STLXPW:
@@ -1504,7 +1512,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
   Inst.addOperand(MCOperand::CreateImm(crm));
 
   bool ValidNamed;
-  (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
+  const AArch64Disassembler *Dis = 
+      static_cast<const AArch64Disassembler *>(Decoder);
+  (void)AArch64PState::PStateMapper().toString(pstate_field, 
+      Dis->getSubtargetInfo().getFeatureBits(), ValidNamed);
 
   return ValidNamed ? Success : Fail;
 }
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 46a1d79..febd332 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -34,18 +34,13 @@ using namespace llvm;
 
 AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
                                        const MCInstrInfo &MII,
-                                       const MCRegisterInfo &MRI,
-                                       const MCSubtargetInfo &STI)
-    : MCInstPrinter(MAI, MII, MRI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(STI.getFeatureBits());
-}
+                                       const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI)
-    : AArch64InstPrinter(MAI, MII, MRI, STI) {}
+                                                 const MCRegisterInfo &MRI)
+    : AArch64InstPrinter(MAI, MII, MRI) {}
 
 void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   // This is for .cfi directives.
@@ -53,7 +48,8 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot) {
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   // Check for special encodings and print the canonical alias instead.
 
   unsigned Opcode = MI->getOpcode();
@@ -210,8 +206,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
-  if (!printAliasInstr(MI, O))
-    printInstruction(MI, O);
+  if (!printAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
 
   printAnnotation(O, Annot);
 }
@@ -614,7 +610,8 @@ static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
 }
 
 void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                        StringRef Annot) {
+                                        StringRef Annot,
+                                        const MCSubtargetInfo &STI) {
   unsigned Opcode = MI->getOpcode();
   StringRef Layout, Mnemonic;
 
@@ -624,7 +621,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
       << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
 
     unsigned ListOpNum = IsTbx ? 2 : 1;
-    printVectorList(MI, ListOpNum, O, "");
+    printVectorList(MI, ListOpNum, STI, O, "");
 
     O << ", "
       << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
@@ -638,7 +635,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     // Now onto the operands: first a vector list with possible lane
     // specifier. E.g. { v0 }[2]
     int OpNum = LdStDesc->ListOperand;
-    printVectorList(MI, OpNum++, O, "");
+    printVectorList(MI, OpNum++, STI, O, "");
 
     if (LdStDesc->HasLane)
       O << '[' << MI->getOperand(OpNum++).getImm() << ']';
@@ -662,7 +659,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
-  AArch64InstPrinter::printInst(MI, O, Annot);
+  AArch64InstPrinter::printInst(MI, O, Annot, STI);
 }
 
 bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
@@ -889,6 +886,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
 }
 
 void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
@@ -903,6 +901,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   O << format("#%#llx", Op.getImm());
@@ -922,6 +921,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isReg() && "Non-register vreg operand!");
@@ -930,6 +930,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
@@ -937,6 +938,7 @@ void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   if (MO.isImm()) {
@@ -946,18 +948,19 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
         AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
     O << '#' << Val;
     if (Shift != 0)
-      printShifter(MI, OpNum + 1, O);
+      printShifter(MI, OpNum + 1, STI, O);
 
     if (CommentStream)
       *CommentStream << '=' << (Val << Shift) << '\n';
   } else {
     assert(MO.isExpr() && "Unexpected operand type!");
     O << *MO.getExpr();
-    printShifter(MI, OpNum + 1, O);
+    printShifter(MI, OpNum + 1, STI, O);
   }
 }
 
 void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint64_t Val = MI->getOperand(OpNum).getImm();
   O << "#0x";
@@ -965,6 +968,7 @@ void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint64_t Val = MI->getOperand(OpNum).getImm();
   O << "#0x";
@@ -972,6 +976,7 @@ void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNum).getImm();
   // LSL #0 should not be printed.
@@ -983,18 +988,21 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printShifter(MI, OpNum + 1, O);
+  printShifter(MI, OpNum + 1, STI, O);
 }
 
 void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   O << getRegisterName(MI->getOperand(OpNum).getReg());
-  printArithExtend(MI, OpNum + 1, O);
+  printArithExtend(MI, OpNum + 1, STI, O);
 }
 
 void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNum).getImm();
   AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
@@ -1038,24 +1046,28 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
   O << AArch64CC::getCondCodeName(CC);
 }
 
 void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
   O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
 }
 
 void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
 }
 
 template<int Scale>
 void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   O << '#' << Scale * MI->getOperand(OpNum).getImm();
 }
@@ -1085,10 +1097,12 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned prfop = MI->getOperand(OpNum).getImm();
   bool Valid;
-  StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid);
+  StringRef Name = 
+      AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid);
   if (Valid)
     O << Name;
   else
@@ -1096,6 +1110,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   float FPImm =
@@ -1151,6 +1166,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
 }
 
 void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O,
                                          StringRef LayoutSuffix) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
@@ -1193,14 +1209,17 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
   O << " }";
 }
 
-void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
-  printVectorList(MI, OpNum, O, "");
+void
+AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+                                                   unsigned OpNum,
+                                                   const MCSubtargetInfo &STI,
+                                                   raw_ostream &O) {
+  printVectorList(MI, OpNum, STI, O, "");
 }
 
 template <unsigned NumLanes, char LaneKind>
 void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   std::string Suffix(".");
   if (NumLanes)
@@ -1208,15 +1227,17 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
   else
     Suffix += LaneKind;
 
-  printVectorList(MI, OpNum, O, Suffix);
+  printVectorList(MI, OpNum, STI, O, Suffix);
 }
 
 void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << "[" << MI->getOperand(OpNum).getImm() << "]";
 }
 
 void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
 
@@ -1241,6 +1262,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
 
@@ -1256,6 +1278,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
 }
 
 void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
   unsigned Opcode = MI->getOpcode();
@@ -1263,9 +1286,11 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
   bool Valid;
   StringRef Name;
   if (Opcode == AArch64::ISB)
-    Name = AArch64ISB::ISBMapper().toString(Val, Valid);
+    Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(), 
+                                            Valid);
   else
-    Name = AArch64DB::DBarrierMapper().toString(Val, Valid);
+    Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(), 
+                                                Valid);
   if (Valid)
     O << Name;
   else
@@ -1273,31 +1298,35 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
-  auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
-  std::string Name = Mapper.toString(Val);
+  auto Mapper = AArch64SysReg::MRSMapper();
+  std::string Name = Mapper.toString(Val, STI.getFeatureBits());
 
   O << StringRef(Name).upper();
 }
 
 void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
-  auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
-  std::string Name = Mapper.toString(Val);
+  auto Mapper = AArch64SysReg::MSRMapper();
+  std::string Name = Mapper.toString(Val, STI.getFeatureBits());
 
   O << StringRef(Name).upper();
 }
 
 void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Val = MI->getOperand(OpNo).getImm();
 
   bool Valid;
-  StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid);
+  StringRef Name = 
+      AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid);
   if (Valid)
     O << StringRef(Name.str()).upper();
   else
@@ -1305,6 +1334,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
 }
 
 void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned RawVal = MI->getOperand(OpNo).getImm();
   uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 5f51621..c2077a0 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -26,16 +26,21 @@ class MCOperand;
 class AArch64InstPrinter : public MCInstPrinter {
 public:
   AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                     const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                     const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
 
   // Autogenerated by tblgen.
-  virtual void printInstruction(const MCInst *MI, raw_ostream &O);
-  virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                                raw_ostream &O);
+  virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                               raw_ostream &O);
   virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                                       unsigned PrintMethodIdx, raw_ostream &O);
+                                       unsigned PrintMethodIdx,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O);
   virtual StringRef getRegName(unsigned RegNo) const {
     return getRegisterName(RegNo);
   }
@@ -45,90 +50,126 @@ public:
 protected:
   bool printSysAlias(const MCInst *MI, raw_ostream &O);
   // Operand printers
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printHexImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
   void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
                            raw_ostream &O);
-  template<int Amount>
-  void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+  template <int Amount>
+  void printPostIncOperand(const MCInst *MI, unsigned OpNo,
+                           const MCSubtargetInfo &STI, raw_ostream &O) {
     printPostIncOperand(MI, OpNo, Amount, O);
   }
 
-  void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVRegOperand(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSysCROperand(const MCInst *MI, unsigned OpNo,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddSubImm(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  void printLogicalImm32(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printLogicalImm64(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShifter(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShiftedRegister(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExtendedRegister(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printArithExtend(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
                       char SrcRegKind, unsigned Width);
   template <char SrcRegKind, unsigned Width>
-  void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  void printMemExtend(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O) {
     printMemExtend(MI, OpNum, O, SrcRegKind, Width);
   }
 
-  void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCondCode(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInverseCondCode(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAlignedLabel(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
   void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
                          raw_ostream &O);
   void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale,
                         raw_ostream &O);
 
-  template<int Scale>
-  void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  template <int Scale>
+  void printUImm12Offset(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O) {
     printUImm12Offset(MI, OpNum, Scale, O);
   }
 
-  template<int BitWidth>
-  void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+  template <int BitWidth>
+  void printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O) {
     printAMIndexedWB(MI, OpNum, BitWidth / 8, O);
   }
 
-  void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAMNoIndex(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
 
-  template<int Scale>
-  void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template <int Scale>
+  void printImmScale(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPrefetchOp(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+  void printVectorList(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O,
                        StringRef LayoutSuffix);
 
   /// Print a list of vector registers where the type suffix is implicit
   /// (i.e. attached to the instruction rather than the registers).
   void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
 
   template <unsigned NumLanes, char LaneKind>
-  void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printTypedVectorList(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printVectorIndex(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAdrpLabel(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBarrierOption(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSystemPStateField(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
 };
 
 class AArch64AppleInstPrinter : public AArch64InstPrinter {
 public:
   AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                        const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                          const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
-  void printInstruction(const MCInst *MI, raw_ostream &O) override;
-  bool printAliasInstr(const MCInst *MI, raw_ostream &O) override;
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O) override;
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O) override;
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
                                unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI,
                                raw_ostream &O) override;
   StringRef getRegName(unsigned RegNo) const override {
     return getRegisterName(RegNo);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 84b63a0..e5eb90c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -313,7 +313,7 @@ public:
   DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
       : AArch64AsmBackend(T), MRI(MRI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
                                          MachO::CPU_SUBTYPE_ARM64_ALL);
   }
@@ -461,7 +461,7 @@ public:
   ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
     : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian);
   }
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 5ea49c3..1f516d1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -26,7 +26,7 @@ class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
 public:
   AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian);
 
-  virtual ~AArch64ELFObjectWriter();
+  ~AArch64ELFObjectWriter() override;
 
 protected:
   unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
@@ -248,9 +248,9 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
   llvm_unreachable("Unimplemented fixup -> relocation");
 }
 
-MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
-                                                 uint8_t OSABI,
-                                                 bool IsLittleEndian) {
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+                                                   uint8_t OSABI,
+                                                   bool IsLittleEndian) {
   MCELFObjectTargetWriter *MOTW =
       new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
   return createELFObjectWriter(MOTW, OS, IsLittleEndian);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 8f780d2..540d1fc 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -89,12 +89,12 @@ class AArch64ELFStreamer : public MCELFStreamer {
 public:
   friend class AArch64TargetELFStreamer;
 
-  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                   MCCodeEmitter *Emitter)
+  AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                     raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
       : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
         LastEMS(EMS_None) {}
 
-  ~AArch64ELFStreamer() {}
+  ~AArch64ELFStreamer() override {}
 
   void ChangeSection(const MCSection *Section,
                      const MCExpr *Subsection) override {
@@ -211,8 +211,8 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
 }
 
 MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                        raw_ostream &OS, MCCodeEmitter *Emitter,
-                                        bool RelaxAll) {
+                                        raw_pwrite_stream &OS,
+                                        MCCodeEmitter *Emitter, bool RelaxAll) {
   AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index 71b05cc..ef48203 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -19,8 +19,8 @@
 namespace llvm {
 
 MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                        raw_ostream &OS, MCCodeEmitter *Emitter,
-                                        bool RelaxAll);
+                                        raw_pwrite_stream &OS,
+                                        MCCodeEmitter *Emitter, bool RelaxAll);
 }
 
 #endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 9ea49f0..fd4dc47 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -40,7 +40,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
 public:
   AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
 
-  ~AArch64MCCodeEmitter() {}
+  ~AArch64MCCodeEmitter() override {}
 
   // getBinaryCodeForInstr - TableGen'erated function for getting the
   // binary encoding for an instruction.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 38b399d..afad674 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -109,29 +109,28 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
+                                                 const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new AArch64InstPrinter(MAI, MII, MRI, STI);
+    return new AArch64InstPrinter(MAI, MII, MRI);
   if (SyntaxVariant == 1)
-    return new AArch64AppleInstPrinter(MAI, MII, MRI, STI);
+    return new AArch64AppleInstPrinter(MAI, MII, MRI);
 
   return nullptr;
 }
 
 static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
-                                     MCAsmBackend &TAB, raw_ostream &OS,
+                                     MCAsmBackend &TAB, raw_pwrite_stream &OS,
                                      MCCodeEmitter *Emitter, bool RelaxAll) {
   return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
 }
 
 static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
-                                       raw_ostream &OS, MCCodeEmitter *Emitter,
-                                       bool RelaxAll,
+                                       raw_pwrite_stream &OS,
+                                       MCCodeEmitter *Emitter, bool RelaxAll,
                                        bool DWARFMustBeAtTheEnd) {
   return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
                              DWARFMustBeAtTheEnd,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 7ce303b..4705bdf 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -33,6 +33,7 @@ class StringRef;
 class Target;
 class Triple;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheAArch64leTarget;
 extern Target TheAArch64beTarget;
@@ -48,11 +49,13 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI, StringRef TT,
                                         StringRef CPU);
 
-MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+                                             uint8_t OSABI,
                                              bool IsLittleEndian);
 
-MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
-                                            uint32_t CPUSubtype);
+MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+                                              uint32_t CPUType,
+                                              uint32_t CPUSubtype);
 
 MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 0d9385d..61649c4 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -413,9 +413,9 @@ void AArch64MachObjectWriter::RecordRelocation(
   Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
-                                                  uint32_t CPUType,
-                                                  uint32_t CPUSubtype) {
+MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+                                                    uint32_t CPUType,
+                                                    uint32_t CPUSubtype) {
   return createMachObjectWriter(
       new AArch64MachObjectWriter(CPUType, CPUSubtype), OS,
       /*IsLittleEndian=*/true);
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 160c1c5..8696163 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -18,9 +18,10 @@
 
 using namespace llvm;
 
-StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+StringRef AArch64NamedImmMapper::toString(uint32_t Value, uint64_t FeatureBits,
+                                          bool &Valid) const {
   for (unsigned i = 0; i < NumMappings; ++i) {
-    if (Mappings[i].Value == Value) {
+    if (Mappings[i].isValueEqual(Value, FeatureBits)) {
       Valid = true;
       return Mappings[i].Name;
     }
@@ -30,10 +31,11 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
   return StringRef();
 }
 
-uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+uint32_t AArch64NamedImmMapper::fromString(StringRef Name, uint64_t FeatureBits,
+                                           bool &Valid) const {
   std::string LowerCaseName = Name.lower();
   for (unsigned i = 0; i < NumMappings; ++i) {
-    if (Mappings[i].Name == LowerCaseName) {
+    if (Mappings[i].isNameEqual(LowerCaseName, FeatureBits)) {
       Valid = true;
       return Mappings[i].Value;
     }
@@ -48,744 +50,776 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
 }
 
 const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = {
-  {"s1e1r", S1E1R},
-  {"s1e2r", S1E2R},
-  {"s1e3r", S1E3R},
-  {"s1e1w", S1E1W},
-  {"s1e2w", S1E2W},
-  {"s1e3w", S1E3W},
-  {"s1e0r", S1E0R},
-  {"s1e0w", S1E0W},
-  {"s12e1r", S12E1R},
-  {"s12e1w", S12E1W},
-  {"s12e0r", S12E0R},
-  {"s12e0w", S12E0W},
+  {"s1e1r", S1E1R, 0},
+  {"s1e2r", S1E2R, 0},
+  {"s1e3r", S1E3R, 0},
+  {"s1e1w", S1E1W, 0},
+  {"s1e2w", S1E2W, 0},
+  {"s1e3w", S1E3W, 0},
+  {"s1e0r", S1E0R, 0},
+  {"s1e0w", S1E0W, 0},
+  {"s12e1r", S12E1R, 0},
+  {"s12e1w", S12E1W, 0},
+  {"s12e0r", S12E0R, 0},
+  {"s12e0w", S12E0W, 0},
 };
 
 AArch64AT::ATMapper::ATMapper()
   : AArch64NamedImmMapper(ATMappings, 0) {}
 
 const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = {
-  {"oshld", OSHLD},
-  {"oshst", OSHST},
-  {"osh", OSH},
-  {"nshld", NSHLD},
-  {"nshst", NSHST},
-  {"nsh", NSH},
-  {"ishld", ISHLD},
-  {"ishst", ISHST},
-  {"ish", ISH},
-  {"ld", LD},
-  {"st", ST},
-  {"sy", SY}
+  {"oshld", OSHLD, 0},
+  {"oshst", OSHST, 0},
+  {"osh", OSH, 0},
+  {"nshld", NSHLD, 0},
+  {"nshst", NSHST, 0},
+  {"nsh", NSH, 0},
+  {"ishld", ISHLD, 0},
+  {"ishst", ISHST, 0},
+  {"ish", ISH, 0},
+  {"ld", LD, 0},
+  {"st", ST, 0},
+  {"sy", SY, 0}
 };
 
 AArch64DB::DBarrierMapper::DBarrierMapper()
   : AArch64NamedImmMapper(DBarrierMappings, 16u) {}
 
 const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = {
-  {"zva", ZVA},
-  {"ivac", IVAC},
-  {"isw", ISW},
-  {"cvac", CVAC},
-  {"csw", CSW},
-  {"cvau", CVAU},
-  {"civac", CIVAC},
-  {"cisw", CISW}
+  {"zva", ZVA, 0},
+  {"ivac", IVAC, 0},
+  {"isw", ISW, 0},
+  {"cvac", CVAC, 0},
+  {"csw", CSW, 0},
+  {"cvau", CVAU, 0},
+  {"civac", CIVAC, 0},
+  {"cisw", CISW, 0}
 };
 
 AArch64DC::DCMapper::DCMapper()
   : AArch64NamedImmMapper(DCMappings, 0) {}
 
 const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = {
-  {"ialluis",  IALLUIS},
-  {"iallu", IALLU},
-  {"ivau", IVAU}
+  {"ialluis",  IALLUIS, 0},
+  {"iallu", IALLU, 0},
+  {"ivau", IVAU, 0}
 };
 
 AArch64IC::ICMapper::ICMapper()
   : AArch64NamedImmMapper(ICMappings, 0) {}
 
 const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = {
-  {"sy",  SY},
+  {"sy",  SY, 0},
 };
 
 AArch64ISB::ISBMapper::ISBMapper()
   : AArch64NamedImmMapper(ISBMappings, 16) {}
 
 const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = {
-  {"pldl1keep", PLDL1KEEP},
-  {"pldl1strm", PLDL1STRM},
-  {"pldl2keep", PLDL2KEEP},
-  {"pldl2strm", PLDL2STRM},
-  {"pldl3keep", PLDL3KEEP},
-  {"pldl3strm", PLDL3STRM},
-  {"plil1keep", PLIL1KEEP},
-  {"plil1strm", PLIL1STRM},
-  {"plil2keep", PLIL2KEEP},
-  {"plil2strm", PLIL2STRM},
-  {"plil3keep", PLIL3KEEP},
-  {"plil3strm", PLIL3STRM},
-  {"pstl1keep", PSTL1KEEP},
-  {"pstl1strm", PSTL1STRM},
-  {"pstl2keep", PSTL2KEEP},
-  {"pstl2strm", PSTL2STRM},
-  {"pstl3keep", PSTL3KEEP},
-  {"pstl3strm", PSTL3STRM}
+  {"pldl1keep", PLDL1KEEP, 0},
+  {"pldl1strm", PLDL1STRM, 0},
+  {"pldl2keep", PLDL2KEEP, 0},
+  {"pldl2strm", PLDL2STRM, 0},
+  {"pldl3keep", PLDL3KEEP, 0},
+  {"pldl3strm", PLDL3STRM, 0},
+  {"plil1keep", PLIL1KEEP, 0},
+  {"plil1strm", PLIL1STRM, 0},
+  {"plil2keep", PLIL2KEEP, 0},
+  {"plil2strm", PLIL2STRM, 0},
+  {"plil3keep", PLIL3KEEP, 0},
+  {"plil3strm", PLIL3STRM, 0},
+  {"pstl1keep", PSTL1KEEP, 0},
+  {"pstl1strm", PSTL1STRM, 0},
+  {"pstl2keep", PSTL2KEEP, 0},
+  {"pstl2strm", PSTL2STRM, 0},
+  {"pstl3keep", PSTL3KEEP, 0},
+  {"pstl3strm", PSTL3STRM, 0}
 };
 
 AArch64PRFM::PRFMMapper::PRFMMapper()
   : AArch64NamedImmMapper(PRFMMappings, 32) {}
 
 const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = {
-  {"spsel", SPSel},
-  {"daifset", DAIFSet},
-  {"daifclr", DAIFClr}
+  {"spsel", SPSel, 0},
+  {"daifset", DAIFSet, 0},
+  {"daifclr", DAIFClr, 0},
+
+  // v8.1a "Privileged Access Never" extension-specific PStates
+  {"pan", PAN, AArch64::HasV8_1aOps},
 };
 
 AArch64PState::PStateMapper::PStateMapper()
   : AArch64NamedImmMapper(PStateMappings, 0) {}
 
 const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
-  {"mdccsr_el0", MDCCSR_EL0},
-  {"dbgdtrrx_el0", DBGDTRRX_EL0},
-  {"mdrar_el1", MDRAR_EL1},
-  {"oslsr_el1", OSLSR_EL1},
-  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
-  {"pmceid0_el0", PMCEID0_EL0},
-  {"pmceid1_el0", PMCEID1_EL0},
-  {"midr_el1", MIDR_EL1},
-  {"ccsidr_el1", CCSIDR_EL1},
-  {"clidr_el1", CLIDR_EL1},
-  {"ctr_el0", CTR_EL0},
-  {"mpidr_el1", MPIDR_EL1},
-  {"revidr_el1", REVIDR_EL1},
-  {"aidr_el1", AIDR_EL1},
-  {"dczid_el0", DCZID_EL0},
-  {"id_pfr0_el1", ID_PFR0_EL1},
-  {"id_pfr1_el1", ID_PFR1_EL1},
-  {"id_dfr0_el1", ID_DFR0_EL1},
-  {"id_afr0_el1", ID_AFR0_EL1},
-  {"id_mmfr0_el1", ID_MMFR0_EL1},
-  {"id_mmfr1_el1", ID_MMFR1_EL1},
-  {"id_mmfr2_el1", ID_MMFR2_EL1},
-  {"id_mmfr3_el1", ID_MMFR3_EL1},
-  {"id_isar0_el1", ID_ISAR0_EL1},
-  {"id_isar1_el1", ID_ISAR1_EL1},
-  {"id_isar2_el1", ID_ISAR2_EL1},
-  {"id_isar3_el1", ID_ISAR3_EL1},
-  {"id_isar4_el1", ID_ISAR4_EL1},
-  {"id_isar5_el1", ID_ISAR5_EL1},
-  {"id_aa64pfr0_el1", ID_A64PFR0_EL1},
-  {"id_aa64pfr1_el1", ID_A64PFR1_EL1},
-  {"id_aa64dfr0_el1", ID_A64DFR0_EL1},
-  {"id_aa64dfr1_el1", ID_A64DFR1_EL1},
-  {"id_aa64afr0_el1", ID_A64AFR0_EL1},
-  {"id_aa64afr1_el1", ID_A64AFR1_EL1},
-  {"id_aa64isar0_el1", ID_A64ISAR0_EL1},
-  {"id_aa64isar1_el1", ID_A64ISAR1_EL1},
-  {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1},
-  {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1},
-  {"mvfr0_el1", MVFR0_EL1},
-  {"mvfr1_el1", MVFR1_EL1},
-  {"mvfr2_el1", MVFR2_EL1},
-  {"rvbar_el1", RVBAR_EL1},
-  {"rvbar_el2", RVBAR_EL2},
-  {"rvbar_el3", RVBAR_EL3},
-  {"isr_el1", ISR_EL1},
-  {"cntpct_el0", CNTPCT_EL0},
-  {"cntvct_el0", CNTVCT_EL0},
+  {"mdccsr_el0", MDCCSR_EL0, 0},
+  {"dbgdtrrx_el0", DBGDTRRX_EL0, 0},
+  {"mdrar_el1", MDRAR_EL1, 0},
+  {"oslsr_el1", OSLSR_EL1, 0},
+  {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1, 0},
+  {"pmceid0_el0", PMCEID0_EL0, 0},
+  {"pmceid1_el0", PMCEID1_EL0, 0},
+  {"midr_el1", MIDR_EL1, 0},
+  {"ccsidr_el1", CCSIDR_EL1, 0},
+  {"clidr_el1", CLIDR_EL1, 0},
+  {"ctr_el0", CTR_EL0, 0},
+  {"mpidr_el1", MPIDR_EL1, 0},
+  {"revidr_el1", REVIDR_EL1, 0},
+  {"aidr_el1", AIDR_EL1, 0},
+  {"dczid_el0", DCZID_EL0, 0},
+  {"id_pfr0_el1", ID_PFR0_EL1, 0},
+  {"id_pfr1_el1", ID_PFR1_EL1, 0},
+  {"id_dfr0_el1", ID_DFR0_EL1, 0},
+  {"id_afr0_el1", ID_AFR0_EL1, 0},
+  {"id_mmfr0_el1", ID_MMFR0_EL1, 0},
+  {"id_mmfr1_el1", ID_MMFR1_EL1, 0},
+  {"id_mmfr2_el1", ID_MMFR2_EL1, 0},
+  {"id_mmfr3_el1", ID_MMFR3_EL1, 0},
+  {"id_isar0_el1", ID_ISAR0_EL1, 0},
+  {"id_isar1_el1", ID_ISAR1_EL1, 0},
+  {"id_isar2_el1", ID_ISAR2_EL1, 0},
+  {"id_isar3_el1", ID_ISAR3_EL1, 0},
+  {"id_isar4_el1", ID_ISAR4_EL1, 0},
+  {"id_isar5_el1", ID_ISAR5_EL1, 0},
+  {"id_aa64pfr0_el1", ID_A64PFR0_EL1, 0},
+  {"id_aa64pfr1_el1", ID_A64PFR1_EL1, 0},
+  {"id_aa64dfr0_el1", ID_A64DFR0_EL1, 0},
+  {"id_aa64dfr1_el1", ID_A64DFR1_EL1, 0},
+  {"id_aa64afr0_el1", ID_A64AFR0_EL1, 0},
+  {"id_aa64afr1_el1", ID_A64AFR1_EL1, 0},
+  {"id_aa64isar0_el1", ID_A64ISAR0_EL1, 0},
+  {"id_aa64isar1_el1", ID_A64ISAR1_EL1, 0},
+  {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, 0},
+  {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, 0},
+  {"mvfr0_el1", MVFR0_EL1, 0},
+  {"mvfr1_el1", MVFR1_EL1, 0},
+  {"mvfr2_el1", MVFR2_EL1, 0},
+  {"rvbar_el1", RVBAR_EL1, 0},
+  {"rvbar_el2", RVBAR_EL2, 0},
+  {"rvbar_el3", RVBAR_EL3, 0},
+  {"isr_el1", ISR_EL1, 0},
+  {"cntpct_el0", CNTPCT_EL0, 0},
+  {"cntvct_el0", CNTVCT_EL0, 0},
 
   // Trace registers
-  {"trcstatr", TRCSTATR},
-  {"trcidr8", TRCIDR8},
-  {"trcidr9", TRCIDR9},
-  {"trcidr10", TRCIDR10},
-  {"trcidr11", TRCIDR11},
-  {"trcidr12", TRCIDR12},
-  {"trcidr13", TRCIDR13},
-  {"trcidr0", TRCIDR0},
-  {"trcidr1", TRCIDR1},
-  {"trcidr2", TRCIDR2},
-  {"trcidr3", TRCIDR3},
-  {"trcidr4", TRCIDR4},
-  {"trcidr5", TRCIDR5},
-  {"trcidr6", TRCIDR6},
-  {"trcidr7", TRCIDR7},
-  {"trcoslsr", TRCOSLSR},
-  {"trcpdsr", TRCPDSR},
-  {"trcdevaff0", TRCDEVAFF0},
-  {"trcdevaff1", TRCDEVAFF1},
-  {"trclsr", TRCLSR},
-  {"trcauthstatus", TRCAUTHSTATUS},
-  {"trcdevarch", TRCDEVARCH},
-  {"trcdevid", TRCDEVID},
-  {"trcdevtype", TRCDEVTYPE},
-  {"trcpidr4", TRCPIDR4},
-  {"trcpidr5", TRCPIDR5},
-  {"trcpidr6", TRCPIDR6},
-  {"trcpidr7", TRCPIDR7},
-  {"trcpidr0", TRCPIDR0},
-  {"trcpidr1", TRCPIDR1},
-  {"trcpidr2", TRCPIDR2},
-  {"trcpidr3", TRCPIDR3},
-  {"trccidr0", TRCCIDR0},
-  {"trccidr1", TRCCIDR1},
-  {"trccidr2", TRCCIDR2},
-  {"trccidr3", TRCCIDR3},
+  {"trcstatr", TRCSTATR, 0},
+  {"trcidr8", TRCIDR8, 0},
+  {"trcidr9", TRCIDR9, 0},
+  {"trcidr10", TRCIDR10, 0},
+  {"trcidr11", TRCIDR11, 0},
+  {"trcidr12", TRCIDR12, 0},
+  {"trcidr13", TRCIDR13, 0},
+  {"trcidr0", TRCIDR0, 0},
+  {"trcidr1", TRCIDR1, 0},
+  {"trcidr2", TRCIDR2, 0},
+  {"trcidr3", TRCIDR3, 0},
+  {"trcidr4", TRCIDR4, 0},
+  {"trcidr5", TRCIDR5, 0},
+  {"trcidr6", TRCIDR6, 0},
+  {"trcidr7", TRCIDR7, 0},
+  {"trcoslsr", TRCOSLSR, 0},
+  {"trcpdsr", TRCPDSR, 0},
+  {"trcdevaff0", TRCDEVAFF0, 0},
+  {"trcdevaff1", TRCDEVAFF1, 0},
+  {"trclsr", TRCLSR, 0},
+  {"trcauthstatus", TRCAUTHSTATUS, 0},
+  {"trcdevarch", TRCDEVARCH, 0},
+  {"trcdevid", TRCDEVID, 0},
+  {"trcdevtype", TRCDEVTYPE, 0},
+  {"trcpidr4", TRCPIDR4, 0},
+  {"trcpidr5", TRCPIDR5, 0},
+  {"trcpidr6", TRCPIDR6, 0},
+  {"trcpidr7", TRCPIDR7, 0},
+  {"trcpidr0", TRCPIDR0, 0},
+  {"trcpidr1", TRCPIDR1, 0},
+  {"trcpidr2", TRCPIDR2, 0},
+  {"trcpidr3", TRCPIDR3, 0},
+  {"trccidr0", TRCCIDR0, 0},
+  {"trccidr1", TRCCIDR1, 0},
+  {"trccidr2", TRCCIDR2, 0},
+  {"trccidr3", TRCCIDR3, 0},
 
   // GICv3 registers
-  {"icc_iar1_el1", ICC_IAR1_EL1},
-  {"icc_iar0_el1", ICC_IAR0_EL1},
-  {"icc_hppir1_el1", ICC_HPPIR1_EL1},
-  {"icc_hppir0_el1", ICC_HPPIR0_EL1},
-  {"icc_rpr_el1", ICC_RPR_EL1},
-  {"ich_vtr_el2", ICH_VTR_EL2},
-  {"ich_eisr_el2", ICH_EISR_EL2},
-  {"ich_elsr_el2", ICH_ELSR_EL2}
+  {"icc_iar1_el1", ICC_IAR1_EL1, 0},
+  {"icc_iar0_el1", ICC_IAR0_EL1, 0},
+  {"icc_hppir1_el1", ICC_HPPIR1_EL1, 0},
+  {"icc_hppir0_el1", ICC_HPPIR0_EL1, 0},
+  {"icc_rpr_el1", ICC_RPR_EL1, 0},
+  {"ich_vtr_el2", ICH_VTR_EL2, 0},
+  {"ich_eisr_el2", ICH_EISR_EL2, 0},
+  {"ich_elsr_el2", ICH_ELSR_EL2, 0}
 };
 
-AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
-  : SysRegMapper(FeatureBits) {
+AArch64SysReg::MRSMapper::MRSMapper() {
     InstMappings = &MRSMappings[0];
     NumInstMappings = llvm::array_lengthof(MRSMappings);
 }
 
 const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
-  {"dbgdtrtx_el0", DBGDTRTX_EL0},
-  {"oslar_el1", OSLAR_EL1},
-  {"pmswinc_el0", PMSWINC_EL0},
+  {"dbgdtrtx_el0", DBGDTRTX_EL0, 0},
+  {"oslar_el1", OSLAR_EL1, 0},
+  {"pmswinc_el0", PMSWINC_EL0, 0},
 
   // Trace registers
-  {"trcoslar", TRCOSLAR},
-  {"trclar", TRCLAR},
+  {"trcoslar", TRCOSLAR, 0},
+  {"trclar", TRCLAR, 0},
 
   // GICv3 registers
-  {"icc_eoir1_el1", ICC_EOIR1_EL1},
-  {"icc_eoir0_el1", ICC_EOIR0_EL1},
-  {"icc_dir_el1", ICC_DIR_EL1},
-  {"icc_sgi1r_el1", ICC_SGI1R_EL1},
-  {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
-  {"icc_sgi0r_el1", ICC_SGI0R_EL1}
+  {"icc_eoir1_el1", ICC_EOIR1_EL1, 0},
+  {"icc_eoir0_el1", ICC_EOIR0_EL1, 0},
+  {"icc_dir_el1", ICC_DIR_EL1, 0},
+  {"icc_sgi1r_el1", ICC_SGI1R_EL1, 0},
+  {"icc_asgi1r_el1", ICC_ASGI1R_EL1, 0},
+  {"icc_sgi0r_el1", ICC_SGI0R_EL1, 0},
+
+  // v8.1a "Privileged Access Never" extension-specific system registers
+  {"pan", PAN, AArch64::HasV8_1aOps},
 };
 
-AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
-  : SysRegMapper(FeatureBits) {
+AArch64SysReg::MSRMapper::MSRMapper() {
     InstMappings = &MSRMappings[0];
     NumInstMappings = llvm::array_lengthof(MSRMappings);
 }
 
 
 const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = {
-  {"osdtrrx_el1", OSDTRRX_EL1},
-  {"osdtrtx_el1",  OSDTRTX_EL1},
-  {"teecr32_el1", TEECR32_EL1},
-  {"mdccint_el1", MDCCINT_EL1},
-  {"mdscr_el1", MDSCR_EL1},
-  {"dbgdtr_el0", DBGDTR_EL0},
-  {"oseccr_el1", OSECCR_EL1},
-  {"dbgvcr32_el2", DBGVCR32_EL2},
-  {"dbgbvr0_el1", DBGBVR0_EL1},
-  {"dbgbvr1_el1", DBGBVR1_EL1},
-  {"dbgbvr2_el1", DBGBVR2_EL1},
-  {"dbgbvr3_el1", DBGBVR3_EL1},
-  {"dbgbvr4_el1", DBGBVR4_EL1},
-  {"dbgbvr5_el1", DBGBVR5_EL1},
-  {"dbgbvr6_el1", DBGBVR6_EL1},
-  {"dbgbvr7_el1", DBGBVR7_EL1},
-  {"dbgbvr8_el1", DBGBVR8_EL1},
-  {"dbgbvr9_el1", DBGBVR9_EL1},
-  {"dbgbvr10_el1", DBGBVR10_EL1},
-  {"dbgbvr11_el1", DBGBVR11_EL1},
-  {"dbgbvr12_el1", DBGBVR12_EL1},
-  {"dbgbvr13_el1", DBGBVR13_EL1},
-  {"dbgbvr14_el1", DBGBVR14_EL1},
-  {"dbgbvr15_el1", DBGBVR15_EL1},
-  {"dbgbcr0_el1", DBGBCR0_EL1},
-  {"dbgbcr1_el1", DBGBCR1_EL1},
-  {"dbgbcr2_el1", DBGBCR2_EL1},
-  {"dbgbcr3_el1", DBGBCR3_EL1},
-  {"dbgbcr4_el1", DBGBCR4_EL1},
-  {"dbgbcr5_el1", DBGBCR5_EL1},
-  {"dbgbcr6_el1", DBGBCR6_EL1},
-  {"dbgbcr7_el1", DBGBCR7_EL1},
-  {"dbgbcr8_el1", DBGBCR8_EL1},
-  {"dbgbcr9_el1", DBGBCR9_EL1},
-  {"dbgbcr10_el1", DBGBCR10_EL1},
-  {"dbgbcr11_el1", DBGBCR11_EL1},
-  {"dbgbcr12_el1", DBGBCR12_EL1},
-  {"dbgbcr13_el1", DBGBCR13_EL1},
-  {"dbgbcr14_el1", DBGBCR14_EL1},
-  {"dbgbcr15_el1", DBGBCR15_EL1},
-  {"dbgwvr0_el1", DBGWVR0_EL1},
-  {"dbgwvr1_el1", DBGWVR1_EL1},
-  {"dbgwvr2_el1", DBGWVR2_EL1},
-  {"dbgwvr3_el1", DBGWVR3_EL1},
-  {"dbgwvr4_el1", DBGWVR4_EL1},
-  {"dbgwvr5_el1", DBGWVR5_EL1},
-  {"dbgwvr6_el1", DBGWVR6_EL1},
-  {"dbgwvr7_el1", DBGWVR7_EL1},
-  {"dbgwvr8_el1", DBGWVR8_EL1},
-  {"dbgwvr9_el1", DBGWVR9_EL1},
-  {"dbgwvr10_el1", DBGWVR10_EL1},
-  {"dbgwvr11_el1", DBGWVR11_EL1},
-  {"dbgwvr12_el1", DBGWVR12_EL1},
-  {"dbgwvr13_el1", DBGWVR13_EL1},
-  {"dbgwvr14_el1", DBGWVR14_EL1},
-  {"dbgwvr15_el1", DBGWVR15_EL1},
-  {"dbgwcr0_el1", DBGWCR0_EL1},
-  {"dbgwcr1_el1", DBGWCR1_EL1},
-  {"dbgwcr2_el1", DBGWCR2_EL1},
-  {"dbgwcr3_el1", DBGWCR3_EL1},
-  {"dbgwcr4_el1", DBGWCR4_EL1},
-  {"dbgwcr5_el1", DBGWCR5_EL1},
-  {"dbgwcr6_el1", DBGWCR6_EL1},
-  {"dbgwcr7_el1", DBGWCR7_EL1},
-  {"dbgwcr8_el1", DBGWCR8_EL1},
-  {"dbgwcr9_el1", DBGWCR9_EL1},
-  {"dbgwcr10_el1", DBGWCR10_EL1},
-  {"dbgwcr11_el1", DBGWCR11_EL1},
-  {"dbgwcr12_el1", DBGWCR12_EL1},
-  {"dbgwcr13_el1", DBGWCR13_EL1},
-  {"dbgwcr14_el1", DBGWCR14_EL1},
-  {"dbgwcr15_el1", DBGWCR15_EL1},
-  {"teehbr32_el1", TEEHBR32_EL1},
-  {"osdlr_el1", OSDLR_EL1},
-  {"dbgprcr_el1", DBGPRCR_EL1},
-  {"dbgclaimset_el1", DBGCLAIMSET_EL1},
-  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
-  {"csselr_el1", CSSELR_EL1},
-  {"vpidr_el2", VPIDR_EL2},
-  {"vmpidr_el2", VMPIDR_EL2},
-  {"sctlr_el1", SCTLR_EL1},
-  {"sctlr_el2", SCTLR_EL2},
-  {"sctlr_el3", SCTLR_EL3},
-  {"actlr_el1", ACTLR_EL1},
-  {"actlr_el2", ACTLR_EL2},
-  {"actlr_el3", ACTLR_EL3},
-  {"cpacr_el1", CPACR_EL1},
-  {"hcr_el2", HCR_EL2},
-  {"scr_el3", SCR_EL3},
-  {"mdcr_el2", MDCR_EL2},
-  {"sder32_el3", SDER32_EL3},
-  {"cptr_el2", CPTR_EL2},
-  {"cptr_el3", CPTR_EL3},
-  {"hstr_el2", HSTR_EL2},
-  {"hacr_el2", HACR_EL2},
-  {"mdcr_el3", MDCR_EL3},
-  {"ttbr0_el1", TTBR0_EL1},
-  {"ttbr0_el2", TTBR0_EL2},
-  {"ttbr0_el3", TTBR0_EL3},
-  {"ttbr1_el1", TTBR1_EL1},
-  {"tcr_el1", TCR_EL1},
-  {"tcr_el2", TCR_EL2},
-  {"tcr_el3", TCR_EL3},
-  {"vttbr_el2", VTTBR_EL2},
-  {"vtcr_el2", VTCR_EL2},
-  {"dacr32_el2", DACR32_EL2},
-  {"spsr_el1", SPSR_EL1},
-  {"spsr_el2", SPSR_EL2},
-  {"spsr_el3", SPSR_EL3},
-  {"elr_el1", ELR_EL1},
-  {"elr_el2", ELR_EL2},
-  {"elr_el3", ELR_EL3},
-  {"sp_el0", SP_EL0},
-  {"sp_el1", SP_EL1},
-  {"sp_el2", SP_EL2},
-  {"spsel", SPSel},
-  {"nzcv", NZCV},
-  {"daif", DAIF},
-  {"currentel", CurrentEL},
-  {"spsr_irq", SPSR_irq},
-  {"spsr_abt", SPSR_abt},
-  {"spsr_und", SPSR_und},
-  {"spsr_fiq", SPSR_fiq},
-  {"fpcr", FPCR},
-  {"fpsr", FPSR},
-  {"dspsr_el0", DSPSR_EL0},
-  {"dlr_el0", DLR_EL0},
-  {"ifsr32_el2", IFSR32_EL2},
-  {"afsr0_el1", AFSR0_EL1},
-  {"afsr0_el2", AFSR0_EL2},
-  {"afsr0_el3", AFSR0_EL3},
-  {"afsr1_el1", AFSR1_EL1},
-  {"afsr1_el2", AFSR1_EL2},
-  {"afsr1_el3", AFSR1_EL3},
-  {"esr_el1", ESR_EL1},
-  {"esr_el2", ESR_EL2},
-  {"esr_el3", ESR_EL3},
-  {"fpexc32_el2", FPEXC32_EL2},
-  {"far_el1", FAR_EL1},
-  {"far_el2", FAR_EL2},
-  {"far_el3", FAR_EL3},
-  {"hpfar_el2", HPFAR_EL2},
-  {"par_el1", PAR_EL1},
-  {"pmcr_el0", PMCR_EL0},
-  {"pmcntenset_el0", PMCNTENSET_EL0},
-  {"pmcntenclr_el0", PMCNTENCLR_EL0},
-  {"pmovsclr_el0", PMOVSCLR_EL0},
-  {"pmselr_el0", PMSELR_EL0},
-  {"pmccntr_el0", PMCCNTR_EL0},
-  {"pmxevtyper_el0", PMXEVTYPER_EL0},
-  {"pmxevcntr_el0", PMXEVCNTR_EL0},
-  {"pmuserenr_el0", PMUSERENR_EL0},
-  {"pmintenset_el1", PMINTENSET_EL1},
-  {"pmintenclr_el1", PMINTENCLR_EL1},
-  {"pmovsset_el0", PMOVSSET_EL0},
-  {"mair_el1", MAIR_EL1},
-  {"mair_el2", MAIR_EL2},
-  {"mair_el3", MAIR_EL3},
-  {"amair_el1", AMAIR_EL1},
-  {"amair_el2", AMAIR_EL2},
-  {"amair_el3", AMAIR_EL3},
-  {"vbar_el1", VBAR_EL1},
-  {"vbar_el2", VBAR_EL2},
-  {"vbar_el3", VBAR_EL3},
-  {"rmr_el1", RMR_EL1},
-  {"rmr_el2", RMR_EL2},
-  {"rmr_el3", RMR_EL3},
-  {"contextidr_el1", CONTEXTIDR_EL1},
-  {"tpidr_el0", TPIDR_EL0},
-  {"tpidr_el2", TPIDR_EL2},
-  {"tpidr_el3", TPIDR_EL3},
-  {"tpidrro_el0", TPIDRRO_EL0},
-  {"tpidr_el1", TPIDR_EL1},
-  {"cntfrq_el0", CNTFRQ_EL0},
-  {"cntvoff_el2", CNTVOFF_EL2},
-  {"cntkctl_el1", CNTKCTL_EL1},
-  {"cnthctl_el2", CNTHCTL_EL2},
-  {"cntp_tval_el0", CNTP_TVAL_EL0},
-  {"cnthp_tval_el2", CNTHP_TVAL_EL2},
-  {"cntps_tval_el1", CNTPS_TVAL_EL1},
-  {"cntp_ctl_el0", CNTP_CTL_EL0},
-  {"cnthp_ctl_el2", CNTHP_CTL_EL2},
-  {"cntps_ctl_el1", CNTPS_CTL_EL1},
-  {"cntp_cval_el0", CNTP_CVAL_EL0},
-  {"cnthp_cval_el2", CNTHP_CVAL_EL2},
-  {"cntps_cval_el1", CNTPS_CVAL_EL1},
-  {"cntv_tval_el0", CNTV_TVAL_EL0},
-  {"cntv_ctl_el0", CNTV_CTL_EL0},
-  {"cntv_cval_el0", CNTV_CVAL_EL0},
-  {"pmevcntr0_el0", PMEVCNTR0_EL0},
-  {"pmevcntr1_el0", PMEVCNTR1_EL0},
-  {"pmevcntr2_el0", PMEVCNTR2_EL0},
-  {"pmevcntr3_el0", PMEVCNTR3_EL0},
-  {"pmevcntr4_el0", PMEVCNTR4_EL0},
-  {"pmevcntr5_el0", PMEVCNTR5_EL0},
-  {"pmevcntr6_el0", PMEVCNTR6_EL0},
-  {"pmevcntr7_el0", PMEVCNTR7_EL0},
-  {"pmevcntr8_el0", PMEVCNTR8_EL0},
-  {"pmevcntr9_el0", PMEVCNTR9_EL0},
-  {"pmevcntr10_el0", PMEVCNTR10_EL0},
-  {"pmevcntr11_el0", PMEVCNTR11_EL0},
-  {"pmevcntr12_el0", PMEVCNTR12_EL0},
-  {"pmevcntr13_el0", PMEVCNTR13_EL0},
-  {"pmevcntr14_el0", PMEVCNTR14_EL0},
-  {"pmevcntr15_el0", PMEVCNTR15_EL0},
-  {"pmevcntr16_el0", PMEVCNTR16_EL0},
-  {"pmevcntr17_el0", PMEVCNTR17_EL0},
-  {"pmevcntr18_el0", PMEVCNTR18_EL0},
-  {"pmevcntr19_el0", PMEVCNTR19_EL0},
-  {"pmevcntr20_el0", PMEVCNTR20_EL0},
-  {"pmevcntr21_el0", PMEVCNTR21_EL0},
-  {"pmevcntr22_el0", PMEVCNTR22_EL0},
-  {"pmevcntr23_el0", PMEVCNTR23_EL0},
-  {"pmevcntr24_el0", PMEVCNTR24_EL0},
-  {"pmevcntr25_el0", PMEVCNTR25_EL0},
-  {"pmevcntr26_el0", PMEVCNTR26_EL0},
-  {"pmevcntr27_el0", PMEVCNTR27_EL0},
-  {"pmevcntr28_el0", PMEVCNTR28_EL0},
-  {"pmevcntr29_el0", PMEVCNTR29_EL0},
-  {"pmevcntr30_el0", PMEVCNTR30_EL0},
-  {"pmccfiltr_el0", PMCCFILTR_EL0},
-  {"pmevtyper0_el0", PMEVTYPER0_EL0},
-  {"pmevtyper1_el0", PMEVTYPER1_EL0},
-  {"pmevtyper2_el0", PMEVTYPER2_EL0},
-  {"pmevtyper3_el0", PMEVTYPER3_EL0},
-  {"pmevtyper4_el0", PMEVTYPER4_EL0},
-  {"pmevtyper5_el0", PMEVTYPER5_EL0},
-  {"pmevtyper6_el0", PMEVTYPER6_EL0},
-  {"pmevtyper7_el0", PMEVTYPER7_EL0},
-  {"pmevtyper8_el0", PMEVTYPER8_EL0},
-  {"pmevtyper9_el0", PMEVTYPER9_EL0},
-  {"pmevtyper10_el0", PMEVTYPER10_EL0},
-  {"pmevtyper11_el0", PMEVTYPER11_EL0},
-  {"pmevtyper12_el0", PMEVTYPER12_EL0},
-  {"pmevtyper13_el0", PMEVTYPER13_EL0},
-  {"pmevtyper14_el0", PMEVTYPER14_EL0},
-  {"pmevtyper15_el0", PMEVTYPER15_EL0},
-  {"pmevtyper16_el0", PMEVTYPER16_EL0},
-  {"pmevtyper17_el0", PMEVTYPER17_EL0},
-  {"pmevtyper18_el0", PMEVTYPER18_EL0},
-  {"pmevtyper19_el0", PMEVTYPER19_EL0},
-  {"pmevtyper20_el0", PMEVTYPER20_EL0},
-  {"pmevtyper21_el0", PMEVTYPER21_EL0},
-  {"pmevtyper22_el0", PMEVTYPER22_EL0},
-  {"pmevtyper23_el0", PMEVTYPER23_EL0},
-  {"pmevtyper24_el0", PMEVTYPER24_EL0},
-  {"pmevtyper25_el0", PMEVTYPER25_EL0},
-  {"pmevtyper26_el0", PMEVTYPER26_EL0},
-  {"pmevtyper27_el0", PMEVTYPER27_EL0},
-  {"pmevtyper28_el0", PMEVTYPER28_EL0},
-  {"pmevtyper29_el0", PMEVTYPER29_EL0},
-  {"pmevtyper30_el0", PMEVTYPER30_EL0},
+  {"osdtrrx_el1", OSDTRRX_EL1, 0},
+  {"osdtrtx_el1",  OSDTRTX_EL1, 0},
+  {"teecr32_el1", TEECR32_EL1, 0},
+  {"mdccint_el1", MDCCINT_EL1, 0},
+  {"mdscr_el1", MDSCR_EL1, 0},
+  {"dbgdtr_el0", DBGDTR_EL0, 0},
+  {"oseccr_el1", OSECCR_EL1, 0},
+  {"dbgvcr32_el2", DBGVCR32_EL2, 0},
+  {"dbgbvr0_el1", DBGBVR0_EL1, 0},
+  {"dbgbvr1_el1", DBGBVR1_EL1, 0},
+  {"dbgbvr2_el1", DBGBVR2_EL1, 0},
+  {"dbgbvr3_el1", DBGBVR3_EL1, 0},
+  {"dbgbvr4_el1", DBGBVR4_EL1, 0},
+  {"dbgbvr5_el1", DBGBVR5_EL1, 0},
+  {"dbgbvr6_el1", DBGBVR6_EL1, 0},
+  {"dbgbvr7_el1", DBGBVR7_EL1, 0},
+  {"dbgbvr8_el1", DBGBVR8_EL1, 0},
+  {"dbgbvr9_el1", DBGBVR9_EL1, 0},
+  {"dbgbvr10_el1", DBGBVR10_EL1, 0},
+  {"dbgbvr11_el1", DBGBVR11_EL1, 0},
+  {"dbgbvr12_el1", DBGBVR12_EL1, 0},
+  {"dbgbvr13_el1", DBGBVR13_EL1, 0},
+  {"dbgbvr14_el1", DBGBVR14_EL1, 0},
+  {"dbgbvr15_el1", DBGBVR15_EL1, 0},
+  {"dbgbcr0_el1", DBGBCR0_EL1, 0},
+  {"dbgbcr1_el1", DBGBCR1_EL1, 0},
+  {"dbgbcr2_el1", DBGBCR2_EL1, 0},
+  {"dbgbcr3_el1", DBGBCR3_EL1, 0},
+  {"dbgbcr4_el1", DBGBCR4_EL1, 0},
+  {"dbgbcr5_el1", DBGBCR5_EL1, 0},
+  {"dbgbcr6_el1", DBGBCR6_EL1, 0},
+  {"dbgbcr7_el1", DBGBCR7_EL1, 0},
+  {"dbgbcr8_el1", DBGBCR8_EL1, 0},
+  {"dbgbcr9_el1", DBGBCR9_EL1, 0},
+  {"dbgbcr10_el1", DBGBCR10_EL1, 0},
+  {"dbgbcr11_el1", DBGBCR11_EL1, 0},
+  {"dbgbcr12_el1", DBGBCR12_EL1, 0},
+  {"dbgbcr13_el1", DBGBCR13_EL1, 0},
+  {"dbgbcr14_el1", DBGBCR14_EL1, 0},
+  {"dbgbcr15_el1", DBGBCR15_EL1, 0},
+  {"dbgwvr0_el1", DBGWVR0_EL1, 0},
+  {"dbgwvr1_el1", DBGWVR1_EL1, 0},
+  {"dbgwvr2_el1", DBGWVR2_EL1, 0},
+  {"dbgwvr3_el1", DBGWVR3_EL1, 0},
+  {"dbgwvr4_el1", DBGWVR4_EL1, 0},
+  {"dbgwvr5_el1", DBGWVR5_EL1, 0},
+  {"dbgwvr6_el1", DBGWVR6_EL1, 0},
+  {"dbgwvr7_el1", DBGWVR7_EL1, 0},
+  {"dbgwvr8_el1", DBGWVR8_EL1, 0},
+  {"dbgwvr9_el1", DBGWVR9_EL1, 0},
+  {"dbgwvr10_el1", DBGWVR10_EL1, 0},
+  {"dbgwvr11_el1", DBGWVR11_EL1, 0},
+  {"dbgwvr12_el1", DBGWVR12_EL1, 0},
+  {"dbgwvr13_el1", DBGWVR13_EL1, 0},
+  {"dbgwvr14_el1", DBGWVR14_EL1, 0},
+  {"dbgwvr15_el1", DBGWVR15_EL1, 0},
+  {"dbgwcr0_el1", DBGWCR0_EL1, 0},
+  {"dbgwcr1_el1", DBGWCR1_EL1, 0},
+  {"dbgwcr2_el1", DBGWCR2_EL1, 0},
+  {"dbgwcr3_el1", DBGWCR3_EL1, 0},
+  {"dbgwcr4_el1", DBGWCR4_EL1, 0},
+  {"dbgwcr5_el1", DBGWCR5_EL1, 0},
+  {"dbgwcr6_el1", DBGWCR6_EL1, 0},
+  {"dbgwcr7_el1", DBGWCR7_EL1, 0},
+  {"dbgwcr8_el1", DBGWCR8_EL1, 0},
+  {"dbgwcr9_el1", DBGWCR9_EL1, 0},
+  {"dbgwcr10_el1", DBGWCR10_EL1, 0},
+  {"dbgwcr11_el1", DBGWCR11_EL1, 0},
+  {"dbgwcr12_el1", DBGWCR12_EL1, 0},
+  {"dbgwcr13_el1", DBGWCR13_EL1, 0},
+  {"dbgwcr14_el1", DBGWCR14_EL1, 0},
+  {"dbgwcr15_el1", DBGWCR15_EL1, 0},
+  {"teehbr32_el1", TEEHBR32_EL1, 0},
+  {"osdlr_el1", OSDLR_EL1, 0},
+  {"dbgprcr_el1", DBGPRCR_EL1, 0},
+  {"dbgclaimset_el1", DBGCLAIMSET_EL1, 0},
+  {"dbgclaimclr_el1", DBGCLAIMCLR_EL1, 0},
+  {"csselr_el1", CSSELR_EL1, 0},
+  {"vpidr_el2", VPIDR_EL2, 0},
+  {"vmpidr_el2", VMPIDR_EL2, 0},
+  {"sctlr_el1", SCTLR_EL1, 0},
+  {"sctlr_el2", SCTLR_EL2, 0},
+  {"sctlr_el3", SCTLR_EL3, 0},
+  {"actlr_el1", ACTLR_EL1, 0},
+  {"actlr_el2", ACTLR_EL2, 0},
+  {"actlr_el3", ACTLR_EL3, 0},
+  {"cpacr_el1", CPACR_EL1, 0},
+  {"hcr_el2", HCR_EL2, 0},
+  {"scr_el3", SCR_EL3, 0},
+  {"mdcr_el2", MDCR_EL2, 0},
+  {"sder32_el3", SDER32_EL3, 0},
+  {"cptr_el2", CPTR_EL2, 0},
+  {"cptr_el3", CPTR_EL3, 0},
+  {"hstr_el2", HSTR_EL2, 0},
+  {"hacr_el2", HACR_EL2, 0},
+  {"mdcr_el3", MDCR_EL3, 0},
+  {"ttbr0_el1", TTBR0_EL1, 0},
+  {"ttbr0_el2", TTBR0_EL2, 0},
+  {"ttbr0_el3", TTBR0_EL3, 0},
+  {"ttbr1_el1", TTBR1_EL1, 0},
+  {"tcr_el1", TCR_EL1, 0},
+  {"tcr_el2", TCR_EL2, 0},
+  {"tcr_el3", TCR_EL3, 0},
+  {"vttbr_el2", VTTBR_EL2, 0},
+  {"vtcr_el2", VTCR_EL2, 0},
+  {"dacr32_el2", DACR32_EL2, 0},
+  {"spsr_el1", SPSR_EL1, 0},
+  {"spsr_el2", SPSR_EL2, 0},
+  {"spsr_el3", SPSR_EL3, 0},
+  {"elr_el1", ELR_EL1, 0},
+  {"elr_el2", ELR_EL2, 0},
+  {"elr_el3", ELR_EL3, 0},
+  {"sp_el0", SP_EL0, 0},
+  {"sp_el1", SP_EL1, 0},
+  {"sp_el2", SP_EL2, 0},
+  {"spsel", SPSel, 0},
+  {"nzcv", NZCV, 0},
+  {"daif", DAIF, 0},
+  {"currentel", CurrentEL, 0},
+  {"spsr_irq", SPSR_irq, 0},
+  {"spsr_abt", SPSR_abt, 0},
+  {"spsr_und", SPSR_und, 0},
+  {"spsr_fiq", SPSR_fiq, 0},
+  {"fpcr", FPCR, 0},
+  {"fpsr", FPSR, 0},
+  {"dspsr_el0", DSPSR_EL0, 0},
+  {"dlr_el0", DLR_EL0, 0},
+  {"ifsr32_el2", IFSR32_EL2, 0},
+  {"afsr0_el1", AFSR0_EL1, 0},
+  {"afsr0_el2", AFSR0_EL2, 0},
+  {"afsr0_el3", AFSR0_EL3, 0},
+  {"afsr1_el1", AFSR1_EL1, 0},
+  {"afsr1_el2", AFSR1_EL2, 0},
+  {"afsr1_el3", AFSR1_EL3, 0},
+  {"esr_el1", ESR_EL1, 0},
+  {"esr_el2", ESR_EL2, 0},
+  {"esr_el3", ESR_EL3, 0},
+  {"fpexc32_el2", FPEXC32_EL2, 0},
+  {"far_el1", FAR_EL1, 0},
+  {"far_el2", FAR_EL2, 0},
+  {"far_el3", FAR_EL3, 0},
+  {"hpfar_el2", HPFAR_EL2, 0},
+  {"par_el1", PAR_EL1, 0},
+  {"pmcr_el0", PMCR_EL0, 0},
+  {"pmcntenset_el0", PMCNTENSET_EL0, 0},
+  {"pmcntenclr_el0", PMCNTENCLR_EL0, 0},
+  {"pmovsclr_el0", PMOVSCLR_EL0, 0},
+  {"pmselr_el0", PMSELR_EL0, 0},
+  {"pmccntr_el0", PMCCNTR_EL0, 0},
+  {"pmxevtyper_el0", PMXEVTYPER_EL0, 0},
+  {"pmxevcntr_el0", PMXEVCNTR_EL0, 0},
+  {"pmuserenr_el0", PMUSERENR_EL0, 0},
+  {"pmintenset_el1", PMINTENSET_EL1, 0},
+  {"pmintenclr_el1", PMINTENCLR_EL1, 0},
+  {"pmovsset_el0", PMOVSSET_EL0, 0},
+  {"mair_el1", MAIR_EL1, 0},
+  {"mair_el2", MAIR_EL2, 0},
+  {"mair_el3", MAIR_EL3, 0},
+  {"amair_el1", AMAIR_EL1, 0},
+  {"amair_el2", AMAIR_EL2, 0},
+  {"amair_el3", AMAIR_EL3, 0},
+  {"vbar_el1", VBAR_EL1, 0},
+  {"vbar_el2", VBAR_EL2, 0},
+  {"vbar_el3", VBAR_EL3, 0},
+  {"rmr_el1", RMR_EL1, 0},
+  {"rmr_el2", RMR_EL2, 0},
+  {"rmr_el3", RMR_EL3, 0},
+  {"contextidr_el1", CONTEXTIDR_EL1, 0},
+  {"tpidr_el0", TPIDR_EL0, 0},
+  {"tpidr_el2", TPIDR_EL2, 0},
+  {"tpidr_el3", TPIDR_EL3, 0},
+  {"tpidrro_el0", TPIDRRO_EL0, 0},
+  {"tpidr_el1", TPIDR_EL1, 0},
+  {"cntfrq_el0", CNTFRQ_EL0, 0},
+  {"cntvoff_el2", CNTVOFF_EL2, 0},
+  {"cntkctl_el1", CNTKCTL_EL1, 0},
+  {"cnthctl_el2", CNTHCTL_EL2, 0},
+  {"cntp_tval_el0", CNTP_TVAL_EL0, 0},
+  {"cnthp_tval_el2", CNTHP_TVAL_EL2, 0},
+  {"cntps_tval_el1", CNTPS_TVAL_EL1, 0},
+  {"cntp_ctl_el0", CNTP_CTL_EL0, 0},
+  {"cnthp_ctl_el2", CNTHP_CTL_EL2, 0},
+  {"cntps_ctl_el1", CNTPS_CTL_EL1, 0},
+  {"cntp_cval_el0", CNTP_CVAL_EL0, 0},
+  {"cnthp_cval_el2", CNTHP_CVAL_EL2, 0},
+  {"cntps_cval_el1", CNTPS_CVAL_EL1, 0},
+  {"cntv_tval_el0", CNTV_TVAL_EL0, 0},
+  {"cntv_ctl_el0", CNTV_CTL_EL0, 0},
+  {"cntv_cval_el0", CNTV_CVAL_EL0, 0},
+  {"pmevcntr0_el0", PMEVCNTR0_EL0, 0},
+  {"pmevcntr1_el0", PMEVCNTR1_EL0, 0},
+  {"pmevcntr2_el0", PMEVCNTR2_EL0, 0},
+  {"pmevcntr3_el0", PMEVCNTR3_EL0, 0},
+  {"pmevcntr4_el0", PMEVCNTR4_EL0, 0},
+  {"pmevcntr5_el0", PMEVCNTR5_EL0, 0},
+  {"pmevcntr6_el0", PMEVCNTR6_EL0, 0},
+  {"pmevcntr7_el0", PMEVCNTR7_EL0, 0},
+  {"pmevcntr8_el0", PMEVCNTR8_EL0, 0},
+  {"pmevcntr9_el0", PMEVCNTR9_EL0, 0},
+  {"pmevcntr10_el0", PMEVCNTR10_EL0, 0},
+  {"pmevcntr11_el0", PMEVCNTR11_EL0, 0},
+  {"pmevcntr12_el0", PMEVCNTR12_EL0, 0},
+  {"pmevcntr13_el0", PMEVCNTR13_EL0, 0},
+  {"pmevcntr14_el0", PMEVCNTR14_EL0, 0},
+  {"pmevcntr15_el0", PMEVCNTR15_EL0, 0},
+  {"pmevcntr16_el0", PMEVCNTR16_EL0, 0},
+  {"pmevcntr17_el0", PMEVCNTR17_EL0, 0},
+  {"pmevcntr18_el0", PMEVCNTR18_EL0, 0},
+  {"pmevcntr19_el0", PMEVCNTR19_EL0, 0},
+  {"pmevcntr20_el0", PMEVCNTR20_EL0, 0},
+  {"pmevcntr21_el0", PMEVCNTR21_EL0, 0},
+  {"pmevcntr22_el0", PMEVCNTR22_EL0, 0},
+  {"pmevcntr23_el0", PMEVCNTR23_EL0, 0},
+  {"pmevcntr24_el0", PMEVCNTR24_EL0, 0},
+  {"pmevcntr25_el0", PMEVCNTR25_EL0, 0},
+  {"pmevcntr26_el0", PMEVCNTR26_EL0, 0},
+  {"pmevcntr27_el0", PMEVCNTR27_EL0, 0},
+  {"pmevcntr28_el0", PMEVCNTR28_EL0, 0},
+  {"pmevcntr29_el0", PMEVCNTR29_EL0, 0},
+  {"pmevcntr30_el0", PMEVCNTR30_EL0, 0},
+  {"pmccfiltr_el0", PMCCFILTR_EL0, 0},
+  {"pmevtyper0_el0", PMEVTYPER0_EL0, 0},
+  {"pmevtyper1_el0", PMEVTYPER1_EL0, 0},
+  {"pmevtyper2_el0", PMEVTYPER2_EL0, 0},
+  {"pmevtyper3_el0", PMEVTYPER3_EL0, 0},
+  {"pmevtyper4_el0", PMEVTYPER4_EL0, 0},
+  {"pmevtyper5_el0", PMEVTYPER5_EL0, 0},
+  {"pmevtyper6_el0", PMEVTYPER6_EL0, 0},
+  {"pmevtyper7_el0", PMEVTYPER7_EL0, 0},
+  {"pmevtyper8_el0", PMEVTYPER8_EL0, 0},
+  {"pmevtyper9_el0", PMEVTYPER9_EL0, 0},
+  {"pmevtyper10_el0", PMEVTYPER10_EL0, 0},
+  {"pmevtyper11_el0", PMEVTYPER11_EL0, 0},
+  {"pmevtyper12_el0", PMEVTYPER12_EL0, 0},
+  {"pmevtyper13_el0", PMEVTYPER13_EL0, 0},
+  {"pmevtyper14_el0", PMEVTYPER14_EL0, 0},
+  {"pmevtyper15_el0", PMEVTYPER15_EL0, 0},
+  {"pmevtyper16_el0", PMEVTYPER16_EL0, 0},
+  {"pmevtyper17_el0", PMEVTYPER17_EL0, 0},
+  {"pmevtyper18_el0", PMEVTYPER18_EL0, 0},
+  {"pmevtyper19_el0", PMEVTYPER19_EL0, 0},
+  {"pmevtyper20_el0", PMEVTYPER20_EL0, 0},
+  {"pmevtyper21_el0", PMEVTYPER21_EL0, 0},
+  {"pmevtyper22_el0", PMEVTYPER22_EL0, 0},
+  {"pmevtyper23_el0", PMEVTYPER23_EL0, 0},
+  {"pmevtyper24_el0", PMEVTYPER24_EL0, 0},
+  {"pmevtyper25_el0", PMEVTYPER25_EL0, 0},
+  {"pmevtyper26_el0", PMEVTYPER26_EL0, 0},
+  {"pmevtyper27_el0", PMEVTYPER27_EL0, 0},
+  {"pmevtyper28_el0", PMEVTYPER28_EL0, 0},
+  {"pmevtyper29_el0", PMEVTYPER29_EL0, 0},
+  {"pmevtyper30_el0", PMEVTYPER30_EL0, 0},
 
   // Trace registers
-  {"trcprgctlr", TRCPRGCTLR},
-  {"trcprocselr", TRCPROCSELR},
-  {"trcconfigr", TRCCONFIGR},
-  {"trcauxctlr", TRCAUXCTLR},
-  {"trceventctl0r", TRCEVENTCTL0R},
-  {"trceventctl1r", TRCEVENTCTL1R},
-  {"trcstallctlr", TRCSTALLCTLR},
-  {"trctsctlr", TRCTSCTLR},
-  {"trcsyncpr", TRCSYNCPR},
-  {"trcccctlr", TRCCCCTLR},
-  {"trcbbctlr", TRCBBCTLR},
-  {"trctraceidr", TRCTRACEIDR},
-  {"trcqctlr", TRCQCTLR},
-  {"trcvictlr", TRCVICTLR},
-  {"trcviiectlr", TRCVIIECTLR},
-  {"trcvissctlr", TRCVISSCTLR},
-  {"trcvipcssctlr", TRCVIPCSSCTLR},
-  {"trcvdctlr", TRCVDCTLR},
-  {"trcvdsacctlr", TRCVDSACCTLR},
-  {"trcvdarcctlr", TRCVDARCCTLR},
-  {"trcseqevr0", TRCSEQEVR0},
-  {"trcseqevr1", TRCSEQEVR1},
-  {"trcseqevr2", TRCSEQEVR2},
-  {"trcseqrstevr", TRCSEQRSTEVR},
-  {"trcseqstr", TRCSEQSTR},
-  {"trcextinselr", TRCEXTINSELR},
-  {"trccntrldvr0", TRCCNTRLDVR0},
-  {"trccntrldvr1", TRCCNTRLDVR1},
-  {"trccntrldvr2", TRCCNTRLDVR2},
-  {"trccntrldvr3", TRCCNTRLDVR3},
-  {"trccntctlr0", TRCCNTCTLR0},
-  {"trccntctlr1", TRCCNTCTLR1},
-  {"trccntctlr2", TRCCNTCTLR2},
-  {"trccntctlr3", TRCCNTCTLR3},
-  {"trccntvr0", TRCCNTVR0},
-  {"trccntvr1", TRCCNTVR1},
-  {"trccntvr2", TRCCNTVR2},
-  {"trccntvr3", TRCCNTVR3},
-  {"trcimspec0", TRCIMSPEC0},
-  {"trcimspec1", TRCIMSPEC1},
-  {"trcimspec2", TRCIMSPEC2},
-  {"trcimspec3", TRCIMSPEC3},
-  {"trcimspec4", TRCIMSPEC4},
-  {"trcimspec5", TRCIMSPEC5},
-  {"trcimspec6", TRCIMSPEC6},
-  {"trcimspec7", TRCIMSPEC7},
-  {"trcrsctlr2", TRCRSCTLR2},
-  {"trcrsctlr3", TRCRSCTLR3},
-  {"trcrsctlr4", TRCRSCTLR4},
-  {"trcrsctlr5", TRCRSCTLR5},
-  {"trcrsctlr6", TRCRSCTLR6},
-  {"trcrsctlr7", TRCRSCTLR7},
-  {"trcrsctlr8", TRCRSCTLR8},
-  {"trcrsctlr9", TRCRSCTLR9},
-  {"trcrsctlr10", TRCRSCTLR10},
-  {"trcrsctlr11", TRCRSCTLR11},
-  {"trcrsctlr12", TRCRSCTLR12},
-  {"trcrsctlr13", TRCRSCTLR13},
-  {"trcrsctlr14", TRCRSCTLR14},
-  {"trcrsctlr15", TRCRSCTLR15},
-  {"trcrsctlr16", TRCRSCTLR16},
-  {"trcrsctlr17", TRCRSCTLR17},
-  {"trcrsctlr18", TRCRSCTLR18},
-  {"trcrsctlr19", TRCRSCTLR19},
-  {"trcrsctlr20", TRCRSCTLR20},
-  {"trcrsctlr21", TRCRSCTLR21},
-  {"trcrsctlr22", TRCRSCTLR22},
-  {"trcrsctlr23", TRCRSCTLR23},
-  {"trcrsctlr24", TRCRSCTLR24},
-  {"trcrsctlr25", TRCRSCTLR25},
-  {"trcrsctlr26", TRCRSCTLR26},
-  {"trcrsctlr27", TRCRSCTLR27},
-  {"trcrsctlr28", TRCRSCTLR28},
-  {"trcrsctlr29", TRCRSCTLR29},
-  {"trcrsctlr30", TRCRSCTLR30},
-  {"trcrsctlr31", TRCRSCTLR31},
-  {"trcssccr0", TRCSSCCR0},
-  {"trcssccr1", TRCSSCCR1},
-  {"trcssccr2", TRCSSCCR2},
-  {"trcssccr3", TRCSSCCR3},
-  {"trcssccr4", TRCSSCCR4},
-  {"trcssccr5", TRCSSCCR5},
-  {"trcssccr6", TRCSSCCR6},
-  {"trcssccr7", TRCSSCCR7},
-  {"trcsscsr0", TRCSSCSR0},
-  {"trcsscsr1", TRCSSCSR1},
-  {"trcsscsr2", TRCSSCSR2},
-  {"trcsscsr3", TRCSSCSR3},
-  {"trcsscsr4", TRCSSCSR4},
-  {"trcsscsr5", TRCSSCSR5},
-  {"trcsscsr6", TRCSSCSR6},
-  {"trcsscsr7", TRCSSCSR7},
-  {"trcsspcicr0", TRCSSPCICR0},
-  {"trcsspcicr1", TRCSSPCICR1},
-  {"trcsspcicr2", TRCSSPCICR2},
-  {"trcsspcicr3", TRCSSPCICR3},
-  {"trcsspcicr4", TRCSSPCICR4},
-  {"trcsspcicr5", TRCSSPCICR5},
-  {"trcsspcicr6", TRCSSPCICR6},
-  {"trcsspcicr7", TRCSSPCICR7},
-  {"trcpdcr", TRCPDCR},
-  {"trcacvr0", TRCACVR0},
-  {"trcacvr1", TRCACVR1},
-  {"trcacvr2", TRCACVR2},
-  {"trcacvr3", TRCACVR3},
-  {"trcacvr4", TRCACVR4},
-  {"trcacvr5", TRCACVR5},
-  {"trcacvr6", TRCACVR6},
-  {"trcacvr7", TRCACVR7},
-  {"trcacvr8", TRCACVR8},
-  {"trcacvr9", TRCACVR9},
-  {"trcacvr10", TRCACVR10},
-  {"trcacvr11", TRCACVR11},
-  {"trcacvr12", TRCACVR12},
-  {"trcacvr13", TRCACVR13},
-  {"trcacvr14", TRCACVR14},
-  {"trcacvr15", TRCACVR15},
-  {"trcacatr0", TRCACATR0},
-  {"trcacatr1", TRCACATR1},
-  {"trcacatr2", TRCACATR2},
-  {"trcacatr3", TRCACATR3},
-  {"trcacatr4", TRCACATR4},
-  {"trcacatr5", TRCACATR5},
-  {"trcacatr6", TRCACATR6},
-  {"trcacatr7", TRCACATR7},
-  {"trcacatr8", TRCACATR8},
-  {"trcacatr9", TRCACATR9},
-  {"trcacatr10", TRCACATR10},
-  {"trcacatr11", TRCACATR11},
-  {"trcacatr12", TRCACATR12},
-  {"trcacatr13", TRCACATR13},
-  {"trcacatr14", TRCACATR14},
-  {"trcacatr15", TRCACATR15},
-  {"trcdvcvr0", TRCDVCVR0},
-  {"trcdvcvr1", TRCDVCVR1},
-  {"trcdvcvr2", TRCDVCVR2},
-  {"trcdvcvr3", TRCDVCVR3},
-  {"trcdvcvr4", TRCDVCVR4},
-  {"trcdvcvr5", TRCDVCVR5},
-  {"trcdvcvr6", TRCDVCVR6},
-  {"trcdvcvr7", TRCDVCVR7},
-  {"trcdvcmr0", TRCDVCMR0},
-  {"trcdvcmr1", TRCDVCMR1},
-  {"trcdvcmr2", TRCDVCMR2},
-  {"trcdvcmr3", TRCDVCMR3},
-  {"trcdvcmr4", TRCDVCMR4},
-  {"trcdvcmr5", TRCDVCMR5},
-  {"trcdvcmr6", TRCDVCMR6},
-  {"trcdvcmr7", TRCDVCMR7},
-  {"trccidcvr0", TRCCIDCVR0},
-  {"trccidcvr1", TRCCIDCVR1},
-  {"trccidcvr2", TRCCIDCVR2},
-  {"trccidcvr3", TRCCIDCVR3},
-  {"trccidcvr4", TRCCIDCVR4},
-  {"trccidcvr5", TRCCIDCVR5},
-  {"trccidcvr6", TRCCIDCVR6},
-  {"trccidcvr7", TRCCIDCVR7},
-  {"trcvmidcvr0", TRCVMIDCVR0},
-  {"trcvmidcvr1", TRCVMIDCVR1},
-  {"trcvmidcvr2", TRCVMIDCVR2},
-  {"trcvmidcvr3", TRCVMIDCVR3},
-  {"trcvmidcvr4", TRCVMIDCVR4},
-  {"trcvmidcvr5", TRCVMIDCVR5},
-  {"trcvmidcvr6", TRCVMIDCVR6},
-  {"trcvmidcvr7", TRCVMIDCVR7},
-  {"trccidcctlr0", TRCCIDCCTLR0},
-  {"trccidcctlr1", TRCCIDCCTLR1},
-  {"trcvmidcctlr0", TRCVMIDCCTLR0},
-  {"trcvmidcctlr1", TRCVMIDCCTLR1},
-  {"trcitctrl", TRCITCTRL},
-  {"trcclaimset", TRCCLAIMSET},
-  {"trcclaimclr", TRCCLAIMCLR},
+  {"trcprgctlr", TRCPRGCTLR, 0},
+  {"trcprocselr", TRCPROCSELR, 0},
+  {"trcconfigr", TRCCONFIGR, 0},
+  {"trcauxctlr", TRCAUXCTLR, 0},
+  {"trceventctl0r", TRCEVENTCTL0R, 0},
+  {"trceventctl1r", TRCEVENTCTL1R, 0},
+  {"trcstallctlr", TRCSTALLCTLR, 0},
+  {"trctsctlr", TRCTSCTLR, 0},
+  {"trcsyncpr", TRCSYNCPR, 0},
+  {"trcccctlr", TRCCCCTLR, 0},
+  {"trcbbctlr", TRCBBCTLR, 0},
+  {"trctraceidr", TRCTRACEIDR, 0},
+  {"trcqctlr", TRCQCTLR, 0},
+  {"trcvictlr", TRCVICTLR, 0},
+  {"trcviiectlr", TRCVIIECTLR, 0},
+  {"trcvissctlr", TRCVISSCTLR, 0},
+  {"trcvipcssctlr", TRCVIPCSSCTLR, 0},
+  {"trcvdctlr", TRCVDCTLR, 0},
+  {"trcvdsacctlr", TRCVDSACCTLR, 0},
+  {"trcvdarcctlr", TRCVDARCCTLR, 0},
+  {"trcseqevr0", TRCSEQEVR0, 0},
+  {"trcseqevr1", TRCSEQEVR1, 0},
+  {"trcseqevr2", TRCSEQEVR2, 0},
+  {"trcseqrstevr", TRCSEQRSTEVR, 0},
+  {"trcseqstr", TRCSEQSTR, 0},
+  {"trcextinselr", TRCEXTINSELR, 0},
+  {"trccntrldvr0", TRCCNTRLDVR0, 0},
+  {"trccntrldvr1", TRCCNTRLDVR1, 0},
+  {"trccntrldvr2", TRCCNTRLDVR2, 0},
+  {"trccntrldvr3", TRCCNTRLDVR3, 0},
+  {"trccntctlr0", TRCCNTCTLR0, 0},
+  {"trccntctlr1", TRCCNTCTLR1, 0},
+  {"trccntctlr2", TRCCNTCTLR2, 0},
+  {"trccntctlr3", TRCCNTCTLR3, 0},
+  {"trccntvr0", TRCCNTVR0, 0},
+  {"trccntvr1", TRCCNTVR1, 0},
+  {"trccntvr2", TRCCNTVR2, 0},
+  {"trccntvr3", TRCCNTVR3, 0},
+  {"trcimspec0", TRCIMSPEC0, 0},
+  {"trcimspec1", TRCIMSPEC1, 0},
+  {"trcimspec2", TRCIMSPEC2, 0},
+  {"trcimspec3", TRCIMSPEC3, 0},
+  {"trcimspec4", TRCIMSPEC4, 0},
+  {"trcimspec5", TRCIMSPEC5, 0},
+  {"trcimspec6", TRCIMSPEC6, 0},
+  {"trcimspec7", TRCIMSPEC7, 0},
+  {"trcrsctlr2", TRCRSCTLR2, 0},
+  {"trcrsctlr3", TRCRSCTLR3, 0},
+  {"trcrsctlr4", TRCRSCTLR4, 0},
+  {"trcrsctlr5", TRCRSCTLR5, 0},
+  {"trcrsctlr6", TRCRSCTLR6, 0},
+  {"trcrsctlr7", TRCRSCTLR7, 0},
+  {"trcrsctlr8", TRCRSCTLR8, 0},
+  {"trcrsctlr9", TRCRSCTLR9, 0},
+  {"trcrsctlr10", TRCRSCTLR10, 0},
+  {"trcrsctlr11", TRCRSCTLR11, 0},
+  {"trcrsctlr12", TRCRSCTLR12, 0},
+  {"trcrsctlr13", TRCRSCTLR13, 0},
+  {"trcrsctlr14", TRCRSCTLR14, 0},
+  {"trcrsctlr15", TRCRSCTLR15, 0},
+  {"trcrsctlr16", TRCRSCTLR16, 0},
+  {"trcrsctlr17", TRCRSCTLR17, 0},
+  {"trcrsctlr18", TRCRSCTLR18, 0},
+  {"trcrsctlr19", TRCRSCTLR19, 0},
+  {"trcrsctlr20", TRCRSCTLR20, 0},
+  {"trcrsctlr21", TRCRSCTLR21, 0},
+  {"trcrsctlr22", TRCRSCTLR22, 0},
+  {"trcrsctlr23", TRCRSCTLR23, 0},
+  {"trcrsctlr24", TRCRSCTLR24, 0},
+  {"trcrsctlr25", TRCRSCTLR25, 0},
+  {"trcrsctlr26", TRCRSCTLR26, 0},
+  {"trcrsctlr27", TRCRSCTLR27, 0},
+  {"trcrsctlr28", TRCRSCTLR28, 0},
+  {"trcrsctlr29", TRCRSCTLR29, 0},
+  {"trcrsctlr30", TRCRSCTLR30, 0},
+  {"trcrsctlr31", TRCRSCTLR31, 0},
+  {"trcssccr0", TRCSSCCR0, 0},
+  {"trcssccr1", TRCSSCCR1, 0},
+  {"trcssccr2", TRCSSCCR2, 0},
+  {"trcssccr3", TRCSSCCR3, 0},
+  {"trcssccr4", TRCSSCCR4, 0},
+  {"trcssccr5", TRCSSCCR5, 0},
+  {"trcssccr6", TRCSSCCR6, 0},
+  {"trcssccr7", TRCSSCCR7, 0},
+  {"trcsscsr0", TRCSSCSR0, 0},
+  {"trcsscsr1", TRCSSCSR1, 0},
+  {"trcsscsr2", TRCSSCSR2, 0},
+  {"trcsscsr3", TRCSSCSR3, 0},
+  {"trcsscsr4", TRCSSCSR4, 0},
+  {"trcsscsr5", TRCSSCSR5, 0},
+  {"trcsscsr6", TRCSSCSR6, 0},
+  {"trcsscsr7", TRCSSCSR7, 0},
+  {"trcsspcicr0", TRCSSPCICR0, 0},
+  {"trcsspcicr1", TRCSSPCICR1, 0},
+  {"trcsspcicr2", TRCSSPCICR2, 0},
+  {"trcsspcicr3", TRCSSPCICR3, 0},
+  {"trcsspcicr4", TRCSSPCICR4, 0},
+  {"trcsspcicr5", TRCSSPCICR5, 0},
+  {"trcsspcicr6", TRCSSPCICR6, 0},
+  {"trcsspcicr7", TRCSSPCICR7, 0},
+  {"trcpdcr", TRCPDCR, 0},
+  {"trcacvr0", TRCACVR0, 0},
+  {"trcacvr1", TRCACVR1, 0},
+  {"trcacvr2", TRCACVR2, 0},
+  {"trcacvr3", TRCACVR3, 0},
+  {"trcacvr4", TRCACVR4, 0},
+  {"trcacvr5", TRCACVR5, 0},
+  {"trcacvr6", TRCACVR6, 0},
+  {"trcacvr7", TRCACVR7, 0},
+  {"trcacvr8", TRCACVR8, 0},
+  {"trcacvr9", TRCACVR9, 0},
+  {"trcacvr10", TRCACVR10, 0},
+  {"trcacvr11", TRCACVR11, 0},
+  {"trcacvr12", TRCACVR12, 0},
+  {"trcacvr13", TRCACVR13, 0},
+  {"trcacvr14", TRCACVR14, 0},
+  {"trcacvr15", TRCACVR15, 0},
+  {"trcacatr0", TRCACATR0, 0},
+  {"trcacatr1", TRCACATR1, 0},
+  {"trcacatr2", TRCACATR2, 0},
+  {"trcacatr3", TRCACATR3, 0},
+  {"trcacatr4", TRCACATR4, 0},
+  {"trcacatr5", TRCACATR5, 0},
+  {"trcacatr6", TRCACATR6, 0},
+  {"trcacatr7", TRCACATR7, 0},
+  {"trcacatr8", TRCACATR8, 0},
+  {"trcacatr9", TRCACATR9, 0},
+  {"trcacatr10", TRCACATR10, 0},
+  {"trcacatr11", TRCACATR11, 0},
+  {"trcacatr12", TRCACATR12, 0},
+  {"trcacatr13", TRCACATR13, 0},
+  {"trcacatr14", TRCACATR14, 0},
+  {"trcacatr15", TRCACATR15, 0},
+  {"trcdvcvr0", TRCDVCVR0, 0},
+  {"trcdvcvr1", TRCDVCVR1, 0},
+  {"trcdvcvr2", TRCDVCVR2, 0},
+  {"trcdvcvr3", TRCDVCVR3, 0},
+  {"trcdvcvr4", TRCDVCVR4, 0},
+  {"trcdvcvr5", TRCDVCVR5, 0},
+  {"trcdvcvr6", TRCDVCVR6, 0},
+  {"trcdvcvr7", TRCDVCVR7, 0},
+  {"trcdvcmr0", TRCDVCMR0, 0},
+  {"trcdvcmr1", TRCDVCMR1, 0},
+  {"trcdvcmr2", TRCDVCMR2, 0},
+  {"trcdvcmr3", TRCDVCMR3, 0},
+  {"trcdvcmr4", TRCDVCMR4, 0},
+  {"trcdvcmr5", TRCDVCMR5, 0},
+  {"trcdvcmr6", TRCDVCMR6, 0},
+  {"trcdvcmr7", TRCDVCMR7, 0},
+  {"trccidcvr0", TRCCIDCVR0, 0},
+  {"trccidcvr1", TRCCIDCVR1, 0},
+  {"trccidcvr2", TRCCIDCVR2, 0},
+  {"trccidcvr3", TRCCIDCVR3, 0},
+  {"trccidcvr4", TRCCIDCVR4, 0},
+  {"trccidcvr5", TRCCIDCVR5, 0},
+  {"trccidcvr6", TRCCIDCVR6, 0},
+  {"trccidcvr7", TRCCIDCVR7, 0},
+  {"trcvmidcvr0", TRCVMIDCVR0, 0},
+  {"trcvmidcvr1", TRCVMIDCVR1, 0},
+  {"trcvmidcvr2", TRCVMIDCVR2, 0},
+  {"trcvmidcvr3", TRCVMIDCVR3, 0},
+  {"trcvmidcvr4", TRCVMIDCVR4, 0},
+  {"trcvmidcvr5", TRCVMIDCVR5, 0},
+  {"trcvmidcvr6", TRCVMIDCVR6, 0},
+  {"trcvmidcvr7", TRCVMIDCVR7, 0},
+  {"trccidcctlr0", TRCCIDCCTLR0, 0},
+  {"trccidcctlr1", TRCCIDCCTLR1, 0},
+  {"trcvmidcctlr0", TRCVMIDCCTLR0, 0},
+  {"trcvmidcctlr1", TRCVMIDCCTLR1, 0},
+  {"trcitctrl", TRCITCTRL, 0},
+  {"trcclaimset", TRCCLAIMSET, 0},
+  {"trcclaimclr", TRCCLAIMCLR, 0},
 
   // GICv3 registers
-  {"icc_bpr1_el1", ICC_BPR1_EL1},
-  {"icc_bpr0_el1", ICC_BPR0_EL1},
-  {"icc_pmr_el1", ICC_PMR_EL1},
-  {"icc_ctlr_el1", ICC_CTLR_EL1},
-  {"icc_ctlr_el3", ICC_CTLR_EL3},
-  {"icc_sre_el1", ICC_SRE_EL1},
-  {"icc_sre_el2", ICC_SRE_EL2},
-  {"icc_sre_el3", ICC_SRE_EL3},
-  {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
-  {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
-  {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
-  {"icc_seien_el1", ICC_SEIEN_EL1},
-  {"icc_ap0r0_el1", ICC_AP0R0_EL1},
-  {"icc_ap0r1_el1", ICC_AP0R1_EL1},
-  {"icc_ap0r2_el1", ICC_AP0R2_EL1},
-  {"icc_ap0r3_el1", ICC_AP0R3_EL1},
-  {"icc_ap1r0_el1", ICC_AP1R0_EL1},
-  {"icc_ap1r1_el1", ICC_AP1R1_EL1},
-  {"icc_ap1r2_el1", ICC_AP1R2_EL1},
-  {"icc_ap1r3_el1", ICC_AP1R3_EL1},
-  {"ich_ap0r0_el2", ICH_AP0R0_EL2},
-  {"ich_ap0r1_el2", ICH_AP0R1_EL2},
-  {"ich_ap0r2_el2", ICH_AP0R2_EL2},
-  {"ich_ap0r3_el2", ICH_AP0R3_EL2},
-  {"ich_ap1r0_el2", ICH_AP1R0_EL2},
-  {"ich_ap1r1_el2", ICH_AP1R1_EL2},
-  {"ich_ap1r2_el2", ICH_AP1R2_EL2},
-  {"ich_ap1r3_el2", ICH_AP1R3_EL2},
-  {"ich_hcr_el2", ICH_HCR_EL2},
-  {"ich_misr_el2", ICH_MISR_EL2},
-  {"ich_vmcr_el2", ICH_VMCR_EL2},
-  {"ich_vseir_el2", ICH_VSEIR_EL2},
-  {"ich_lr0_el2", ICH_LR0_EL2},
-  {"ich_lr1_el2", ICH_LR1_EL2},
-  {"ich_lr2_el2", ICH_LR2_EL2},
-  {"ich_lr3_el2", ICH_LR3_EL2},
-  {"ich_lr4_el2", ICH_LR4_EL2},
-  {"ich_lr5_el2", ICH_LR5_EL2},
-  {"ich_lr6_el2", ICH_LR6_EL2},
-  {"ich_lr7_el2", ICH_LR7_EL2},
-  {"ich_lr8_el2", ICH_LR8_EL2},
-  {"ich_lr9_el2", ICH_LR9_EL2},
-  {"ich_lr10_el2", ICH_LR10_EL2},
-  {"ich_lr11_el2", ICH_LR11_EL2},
-  {"ich_lr12_el2", ICH_LR12_EL2},
-  {"ich_lr13_el2", ICH_LR13_EL2},
-  {"ich_lr14_el2", ICH_LR14_EL2},
-  {"ich_lr15_el2", ICH_LR15_EL2}
-};
-
-const AArch64NamedImmMapper::Mapping
-AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = {
-  {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
+  {"icc_bpr1_el1", ICC_BPR1_EL1, 0},
+  {"icc_bpr0_el1", ICC_BPR0_EL1, 0},
+  {"icc_pmr_el1", ICC_PMR_EL1, 0},
+  {"icc_ctlr_el1", ICC_CTLR_EL1, 0},
+  {"icc_ctlr_el3", ICC_CTLR_EL3, 0},
+  {"icc_sre_el1", ICC_SRE_EL1, 0},
+  {"icc_sre_el2", ICC_SRE_EL2, 0},
+  {"icc_sre_el3", ICC_SRE_EL3, 0},
+  {"icc_igrpen0_el1", ICC_IGRPEN0_EL1, 0},
+  {"icc_igrpen1_el1", ICC_IGRPEN1_EL1, 0},
+  {"icc_igrpen1_el3", ICC_IGRPEN1_EL3, 0},
+  {"icc_seien_el1", ICC_SEIEN_EL1, 0},
+  {"icc_ap0r0_el1", ICC_AP0R0_EL1, 0},
+  {"icc_ap0r1_el1", ICC_AP0R1_EL1, 0},
+  {"icc_ap0r2_el1", ICC_AP0R2_EL1, 0},
+  {"icc_ap0r3_el1", ICC_AP0R3_EL1, 0},
+  {"icc_ap1r0_el1", ICC_AP1R0_EL1, 0},
+  {"icc_ap1r1_el1", ICC_AP1R1_EL1, 0},
+  {"icc_ap1r2_el1", ICC_AP1R2_EL1, 0},
+  {"icc_ap1r3_el1", ICC_AP1R3_EL1, 0},
+  {"ich_ap0r0_el2", ICH_AP0R0_EL2, 0},
+  {"ich_ap0r1_el2", ICH_AP0R1_EL2, 0},
+  {"ich_ap0r2_el2", ICH_AP0R2_EL2, 0},
+  {"ich_ap0r3_el2", ICH_AP0R3_EL2, 0},
+  {"ich_ap1r0_el2", ICH_AP1R0_EL2, 0},
+  {"ich_ap1r1_el2", ICH_AP1R1_EL2, 0},
+  {"ich_ap1r2_el2", ICH_AP1R2_EL2, 0},
+  {"ich_ap1r3_el2", ICH_AP1R3_EL2, 0},
+  {"ich_hcr_el2", ICH_HCR_EL2, 0},
+  {"ich_misr_el2", ICH_MISR_EL2, 0},
+  {"ich_vmcr_el2", ICH_VMCR_EL2, 0},
+  {"ich_vseir_el2", ICH_VSEIR_EL2, 0},
+  {"ich_lr0_el2", ICH_LR0_EL2, 0},
+  {"ich_lr1_el2", ICH_LR1_EL2, 0},
+  {"ich_lr2_el2", ICH_LR2_EL2, 0},
+  {"ich_lr3_el2", ICH_LR3_EL2, 0},
+  {"ich_lr4_el2", ICH_LR4_EL2, 0},
+  {"ich_lr5_el2", ICH_LR5_EL2, 0},
+  {"ich_lr6_el2", ICH_LR6_EL2, 0},
+  {"ich_lr7_el2", ICH_LR7_EL2, 0},
+  {"ich_lr8_el2", ICH_LR8_EL2, 0},
+  {"ich_lr9_el2", ICH_LR9_EL2, 0},
+  {"ich_lr10_el2", ICH_LR10_EL2, 0},
+  {"ich_lr11_el2", ICH_LR11_EL2, 0},
+  {"ich_lr12_el2", ICH_LR12_EL2, 0},
+  {"ich_lr13_el2", ICH_LR13_EL2, 0},
+  {"ich_lr14_el2", ICH_LR14_EL2, 0},
+  {"ich_lr15_el2", ICH_LR15_EL2, 0},
+
+  // Cyclone registers
+  {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, AArch64::ProcCyclone},
+
+  // v8.1a "Privileged Access Never" extension-specific system registers
+  {"pan", PAN, AArch64::HasV8_1aOps},
+
+  // v8.1a "Limited Ordering Regions" extension-specific system registers
+  {"lorsa_el1", LORSA_EL1, AArch64::HasV8_1aOps},
+  {"lorea_el1", LOREA_EL1, AArch64::HasV8_1aOps},
+  {"lorn_el1", LORN_EL1, AArch64::HasV8_1aOps},
+  {"lorc_el1", LORC_EL1, AArch64::HasV8_1aOps},
+  {"lorid_el1", LORID_EL1, AArch64::HasV8_1aOps},
+
+  // v8.1a "Virtualization host extensions" system registers
+  {"ttbr1_el2", TTBR1_EL2, AArch64::HasV8_1aOps},
+  {"contextidr_el2", CONTEXTIDR_EL2, AArch64::HasV8_1aOps},
+  {"cnthv_tval_el2", CNTHV_TVAL_EL2, AArch64::HasV8_1aOps},
+  {"cnthv_cval_el2", CNTHV_CVAL_EL2, AArch64::HasV8_1aOps},
+  {"cnthv_ctl_el2", CNTHV_CTL_EL2, AArch64::HasV8_1aOps},
+  {"sctlr_el12", SCTLR_EL12, AArch64::HasV8_1aOps},
+  {"cpacr_el12", CPACR_EL12, AArch64::HasV8_1aOps},
+  {"ttbr0_el12", TTBR0_EL12, AArch64::HasV8_1aOps},
+  {"ttbr1_el12", TTBR1_EL12, AArch64::HasV8_1aOps},
+  {"tcr_el12", TCR_EL12, AArch64::HasV8_1aOps},
+  {"afsr0_el12", AFSR0_EL12, AArch64::HasV8_1aOps},
+  {"afsr1_el12", AFSR1_EL12, AArch64::HasV8_1aOps},
+  {"esr_el12", ESR_EL12, AArch64::HasV8_1aOps},
+  {"far_el12", FAR_EL12, AArch64::HasV8_1aOps},
+  {"mair_el12", MAIR_EL12, AArch64::HasV8_1aOps},
+  {"amair_el12", AMAIR_EL12, AArch64::HasV8_1aOps},
+  {"vbar_el12", VBAR_EL12, AArch64::HasV8_1aOps},
+  {"contextidr_el12", CONTEXTIDR_EL12, AArch64::HasV8_1aOps},
+  {"cntkctl_el12", CNTKCTL_EL12, AArch64::HasV8_1aOps},
+  {"cntp_tval_el02", CNTP_TVAL_EL02, AArch64::HasV8_1aOps},
+  {"cntp_ctl_el02", CNTP_CTL_EL02, AArch64::HasV8_1aOps},
+  {"cntp_cval_el02", CNTP_CVAL_EL02, AArch64::HasV8_1aOps},
+  {"cntv_tval_el02", CNTV_TVAL_EL02, AArch64::HasV8_1aOps},
+  {"cntv_ctl_el02", CNTV_CTL_EL02, AArch64::HasV8_1aOps},
+  {"cntv_cval_el02", CNTV_CVAL_EL02, AArch64::HasV8_1aOps},
+  {"spsr_el12", SPSR_EL12, AArch64::HasV8_1aOps},
+  {"elr_el12", ELR_EL12, AArch64::HasV8_1aOps},
 };
 
 uint32_t
-AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+AArch64SysReg::SysRegMapper::fromString(StringRef Name, uint64_t FeatureBits, 
+                                        bool &Valid) const {
   std::string NameLower = Name.lower();
 
   // First search the registers shared by all
   for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
-    if (SysRegMappings[i].Name == NameLower) {
+    if (SysRegMappings[i].isNameEqual(NameLower, FeatureBits)) {
       Valid = true;
       return SysRegMappings[i].Value;
     }
   }
 
-  // Next search for target specific registers
-  if (FeatureBits & AArch64::ProcCyclone) {
-    for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
-      if (CycloneSysRegMappings[i].Name == NameLower) {
-        Valid = true;
-        return CycloneSysRegMappings[i].Value;
-      }
-    }
-  }
-
   // Now try the instruction-specific registers (either read-only or
   // write-only).
   for (unsigned i = 0; i < NumInstMappings; ++i) {
-    if (InstMappings[i].Name == NameLower) {
+    if (InstMappings[i].isNameEqual(NameLower, FeatureBits)) {
       Valid = true;
       return InstMappings[i].Value;
     }
@@ -814,27 +848,18 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
 }
 
 std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits, uint64_t FeatureBits) const {
   // First search the registers shared by all
   for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
-    if (SysRegMappings[i].Value == Bits) {
+    if (SysRegMappings[i].isValueEqual(Bits, FeatureBits)) {
       return SysRegMappings[i].Name;
     }
   }
 
-  // Next search for target specific registers
-  if (FeatureBits & AArch64::ProcCyclone) {
-    for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
-      if (CycloneSysRegMappings[i].Value == Bits) {
-        return CycloneSysRegMappings[i].Name;
-      }
-    }
-  }
-
   // Now try the instruction-specific registers (either read-only or
   // write-only).
   for (unsigned i = 0; i < NumInstMappings; ++i) {
-    if (InstMappings[i].Value == Bits) {
+    if (InstMappings[i].isValueEqual(Bits, FeatureBits)) {
       return InstMappings[i].Name;
     }
   }
@@ -851,38 +876,38 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
 }
 
 const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = {
-  {"ipas2e1is", IPAS2E1IS},
-  {"ipas2le1is", IPAS2LE1IS},
-  {"vmalle1is", VMALLE1IS},
-  {"alle2is", ALLE2IS},
-  {"alle3is", ALLE3IS},
-  {"vae1is", VAE1IS},
-  {"vae2is", VAE2IS},
-  {"vae3is", VAE3IS},
-  {"aside1is", ASIDE1IS},
-  {"vaae1is", VAAE1IS},
-  {"alle1is", ALLE1IS},
-  {"vale1is", VALE1IS},
-  {"vale2is", VALE2IS},
-  {"vale3is", VALE3IS},
-  {"vmalls12e1is", VMALLS12E1IS},
-  {"vaale1is", VAALE1IS},
-  {"ipas2e1", IPAS2E1},
-  {"ipas2le1", IPAS2LE1},
-  {"vmalle1", VMALLE1},
-  {"alle2", ALLE2},
-  {"alle3", ALLE3},
-  {"vae1", VAE1},
-  {"vae2", VAE2},
-  {"vae3", VAE3},
-  {"aside1", ASIDE1},
-  {"vaae1", VAAE1},
-  {"alle1", ALLE1},
-  {"vale1", VALE1},
-  {"vale2", VALE2},
-  {"vale3", VALE3},
-  {"vmalls12e1", VMALLS12E1},
-  {"vaale1", VAALE1}
+  {"ipas2e1is", IPAS2E1IS, 0},
+  {"ipas2le1is", IPAS2LE1IS, 0},
+  {"vmalle1is", VMALLE1IS, 0},
+  {"alle2is", ALLE2IS, 0},
+  {"alle3is", ALLE3IS, 0},
+  {"vae1is", VAE1IS, 0},
+  {"vae2is", VAE2IS, 0},
+  {"vae3is", VAE3IS, 0},
+  {"aside1is", ASIDE1IS, 0},
+  {"vaae1is", VAAE1IS, 0},
+  {"alle1is", ALLE1IS, 0},
+  {"vale1is", VALE1IS, 0},
+  {"vale2is", VALE2IS, 0},
+  {"vale3is", VALE3IS, 0},
+  {"vmalls12e1is", VMALLS12E1IS, 0},
+  {"vaale1is", VAALE1IS, 0},
+  {"ipas2e1", IPAS2E1, 0},
+  {"ipas2le1", IPAS2LE1, 0},
+  {"vmalle1", VMALLE1, 0},
+  {"alle2", ALLE2, 0},
+  {"alle3", ALLE3, 0},
+  {"vae1", VAE1, 0},
+  {"vae2", VAE2, 0},
+  {"vae3", VAE3, 0},
+  {"aside1", ASIDE1, 0},
+  {"vaae1", VAAE1, 0},
+  {"alle1", ALLE1, 0},
+  {"vale1", VALE1, 0},
+  {"vale2", VALE2, 0},
+  {"vale3", VALE3, 0},
+  {"vmalls12e1", VMALLS12E1, 0},
+  {"vaale1", VAALE1, 0}
 };
 
 AArch64TLBI::TLBIMapper::TLBIMapper()
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 2ae6f52..659ea90 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -280,14 +280,26 @@ struct AArch64NamedImmMapper {
   struct Mapping {
     const char *Name;
     uint32_t Value;
+    uint64_t AvailableForFeatures;
+    // empty AvailableForFeatures means "always-on"
+    bool isNameEqual(std::string Other, uint64_t FeatureBits=~0ULL) const {
+      if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits))
+        return false;
+      return Name == Other;
+    }
+    bool isValueEqual(uint32_t Other, uint64_t FeatureBits=~0ULL) const {
+      if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits))
+        return false;
+      return Value == Other;
+    }
   };
 
   template<int N>
   AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm)
     : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {}
 
-  StringRef toString(uint32_t Value, bool &Valid) const;
-  uint32_t fromString(StringRef Name, bool &Valid) const;
+  StringRef toString(uint32_t Value, uint64_t FeatureBits, bool &Valid) const;
+  uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const;
 
   /// Many of the instructions allow an alternative assembly form consisting of
   /// a simple immediate. Currently the only valid forms are ranges [0, N) where
@@ -435,7 +447,10 @@ namespace AArch64PState {
     Invalid = -1,
     SPSel = 0x05,
     DAIFSet = 0x1e,
-    DAIFClr = 0x1f
+    DAIFClr = 0x1f,
+
+    // v8.1a "Privileged Access Never" extension-specific PStates
+    PAN = 0x04,
   };
 
   struct PStateMapper : AArch64NamedImmMapper {
@@ -1122,11 +1137,48 @@ namespace AArch64SysReg {
     ICH_LR13_EL2      = 0xe66d, // 11  100  1100  1101  101
     ICH_LR14_EL2      = 0xe66e, // 11  100  1100  1101  110
     ICH_LR15_EL2      = 0xe66f, // 11  100  1100  1101  111
-  };
 
-  // Cyclone specific system registers
-  enum CycloneSysRegValues {
-    CPM_IOACC_CTL_EL3 = 0xff90
+    // v8.1a "Privileged Access Never" extension-specific system registers
+    PAN               = 0xc213, // 11  000  0100  0010  011
+
+    // v8.1a "Limited Ordering Regions" extension-specific system registers
+    LORSA_EL1         = 0xc520, // 11  000  1010  0100  000
+    LOREA_EL1         = 0xc521, // 11  000  1010  0100  001
+    LORN_EL1          = 0xc522, // 11  000  1010  0100  010
+    LORC_EL1          = 0xc523, // 11  000  1010  0100  011
+    LORID_EL1         = 0xc527, // 11  000  1010  0100  111
+
+    // v8.1a "Virtualization host extensions" system registers
+    TTBR1_EL2         = 0xe101, // 11  100  0010  0000  001
+    CONTEXTIDR_EL2    = 0xe681, // 11  100  1101  0000  001
+    CNTHV_TVAL_EL2    = 0xe718, // 11  100  1110  0011  000
+    CNTHV_CVAL_EL2    = 0xe71a, // 11  100  1110  0011  010
+    CNTHV_CTL_EL2     = 0xe719, // 11  100  1110  0011  001
+    SCTLR_EL12        = 0xe880, // 11  101  0001  0000  000
+    CPACR_EL12        = 0xe882, // 11  101  0001  0000  010
+    TTBR0_EL12        = 0xe900, // 11  101  0010  0000  000
+    TTBR1_EL12        = 0xe901, // 11  101  0010  0000  001
+    TCR_EL12          = 0xe902, // 11  101  0010  0000  010
+    AFSR0_EL12        = 0xea88, // 11  101  0101  0001  000
+    AFSR1_EL12        = 0xea89, // 11  101  0101  0001  001
+    ESR_EL12          = 0xea90, // 11  101  0101  0010  000
+    FAR_EL12          = 0xeb00, // 11  101  0110  0000  000
+    MAIR_EL12         = 0xed10, // 11  101  1010  0010  000
+    AMAIR_EL12        = 0xed18, // 11  101  1010  0011  000
+    VBAR_EL12         = 0xee00, // 11  101  1100  0000  000
+    CONTEXTIDR_EL12   = 0xee81, // 11  101  1101  0000  001
+    CNTKCTL_EL12      = 0xef08, // 11  101  1110  0001  000
+    CNTP_TVAL_EL02    = 0xef10, // 11  101  1110  0010  000
+    CNTP_CTL_EL02     = 0xef11, // 11  101  1110  0010  001
+    CNTP_CVAL_EL02    = 0xef12, // 11  101  1110  0010  010
+    CNTV_TVAL_EL02    = 0xef18, // 11  101  1110  0011  000
+    CNTV_CTL_EL02     = 0xef19, // 11  101  1110  0011  001
+    CNTV_CVAL_EL02    = 0xef1a, // 11  101  1110  0011  010
+    SPSR_EL12         = 0xea00, // 11  101  0100  0000  000
+    ELR_EL12          = 0xea01, // 11  101  0100  0000  001
+
+    // Cyclone specific system registers
+    CPM_IOACC_CTL_EL3 = 0xff90,
   };
 
   // Note that these do not inherit from AArch64NamedImmMapper. This class is
@@ -1135,25 +1187,23 @@ namespace AArch64SysReg {
   // this one case.
   struct SysRegMapper {
     static const AArch64NamedImmMapper::Mapping SysRegMappings[];
-    static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[];
 
     const AArch64NamedImmMapper::Mapping *InstMappings;
     size_t NumInstMappings;
-    uint64_t FeatureBits;
 
-    SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
-    uint32_t fromString(StringRef Name, bool &Valid) const;
-    std::string toString(uint32_t Bits) const;
+    SysRegMapper() { }
+    uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const;
+    std::string toString(uint32_t Bits, uint64_t FeatureBits) const;
   };
 
   struct MSRMapper : SysRegMapper {
     static const AArch64NamedImmMapper::Mapping MSRMappings[];
-    MSRMapper(uint64_t FeatureBits);
+    MSRMapper();
   };
 
   struct MRSMapper : SysRegMapper {
     static const AArch64NamedImmMapper::Mapping MRSMappings[];
-    MRSMapper(uint64_t FeatureBits);
+    MRSMapper();
   };
 
   uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index ce0aed9..bd1c7af 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -180,7 +180,7 @@ def HasV8Ops    : SubtargetFeature<"v8", "HasV8Ops", "true",
                                    "Support ARM v8 instructions",
                                    [HasV7Ops, FeatureVirtualization,
                                     FeatureMP]>;
-def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
                                    "Support ARM v8.1a instructions",
                                    [HasV8Ops, FeatureAClass, FeatureCRC]>;
 
@@ -260,6 +260,14 @@ def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                     FeatureTrustZone, FeatureT2XtPk,
                                     FeatureCrypto, FeatureCRC]>;
 
+def ProcR4      : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+                                   "Cortex-R4 ARM processors",
+                                   [FeatureHWDiv,
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureDSPThumb2, FeatureT2XtPk,
+                                    HasV7Ops, FeatureDB, FeatureHasRAS,
+                                    FeatureRClass]>;
+
 def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
                                    "Cortex-R5 ARM processors",
                                    [FeatureSlowFPBrcc,
@@ -396,6 +404,16 @@ def : ProcessorModel<"krait",       CortexA9Model,
                                      FeatureDSPThumb2, FeatureHasRAS,
                                      FeatureAClass]>;
 
+// FIXME: R4 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4",   CortexA8Model,
+                                    [ProcR4]>;
+
+// FIXME: R4F has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4f",  CortexA8Model,
+                                    [ProcR4,
+                                     FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+                                     FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>;
+
 // FIXME: R5 has currently the same ProcessorModel as A8.
 def : ProcessorModel<"cortex-r5",   CortexA8Model,
                                     [ProcR5, HasV7Ops, FeatureDB,
@@ -457,14 +475,6 @@ def : ProcessorModel<"cyclone",     SwiftModel,
                                      FeatureDB,FeatureDSPThumb2,
                                      FeatureHasRAS, FeatureZCZeroing]>;
 
-// V8.1 Processors
-def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a,
-                                       FeatureDB, FeatureFPARMv8,
-                                       FeatureNEON, FeatureDSPThumb2,
-                                       FeatureHWDiv, FeatureHWDivARM,
-                                       FeatureTrustZone, FeatureT2XtPk,
-                                       FeatureCrypto]>;
-
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -485,7 +495,15 @@ def ARMInstrInfo : InstrInfo;
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
+def ARMAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
+  int Variant = 0;
+  bit isMCAsmWriter = 1;
+}
+
 def ARM : Target {
   // Pull in Instruction Info:
   let InstructionSet = ARMInstrInfo;
+  let AssemblyWriters = [ARMAsmWriter];
 }
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 102def1..1a2acf5 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -207,7 +207,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
   SmallString<60> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
     << getFunctionNumber() << '_' << uid << '_' << uid2;
-  return OutContext.GetOrCreateSymbol(Name.str());
+  return OutContext.GetOrCreateSymbol(Name);
 }
 
 
@@ -216,7 +216,7 @@ MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
   SmallString<60> Name;
   raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
     << getFunctionNumber();
-  return OutContext.GetOrCreateSymbol(Name.str());
+  return OutContext.GetOrCreateSymbol(Name);
 }
 
 bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
@@ -520,28 +520,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
     // generates code that does this, it is always safe to set.
     OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
-
-  // Emit a .data.rel section containing any stubs that were created.
-  if (TT.isOSBinFormatELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
-
-      for (auto &stub: Stubs) {
-        OutStreamer.EmitLabel(stub.first);
-        OutStreamer.EmitSymbolValue(stub.second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
-  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -597,7 +575,7 @@ void ARMAsmPrinter::emitAttributes() {
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -661,8 +639,8 @@ void ARMAsmPrinter::emitAttributes() {
     // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
     if (STI.hasV8Ops())
       ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
-                        STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a:
-                                         ARMBuildAttrs::AllowNeonARMv8);
+                        STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
+                                            ARMBuildAttrs::AllowNeonARMv8);
   } else {
     if (STI.hasFPARMv8())
       // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index a8c7657..3f79a9b 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -245,11 +245,15 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   // This register should preferably be even (Odd == 0) or odd (Odd == 1).
   // Check if the other part of the pair has already been assigned, and provide
   // the paired register as the first hint.
+  unsigned Paired = Hint.second;
+  if (Paired == 0)
+    return;
+
   unsigned PairedPhys = 0;
-  if (VRM && VRM->hasPhys(Hint.second)) {
-    PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
-    if (PairedPhys && MRI.isReserved(PairedPhys))
-      PairedPhys = 0;
+  if (TargetRegisterInfo::isPhysicalRegister(Paired)) {
+    PairedPhys = Paired;
+  } else if (VRM && VRM->hasPhys(Paired)) {
+    PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this);
   }
 
   // First prefer the paired physreg.
@@ -284,9 +288,14 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
     // change.
     unsigned OtherReg = Hint.second;
     Hint = MRI->getRegAllocationHint(OtherReg);
-    if (Hint.second == Reg)
-      // Make sure the pair has not already divorced.
+    // Make sure the pair has not already divorced.
+    if (Hint.second == Reg) {
       MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+      if (TargetRegisterInfo::isVirtualRegister(NewReg))
+        MRI->setRegAllocationHint(NewReg,
+            Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven
+            : ARMRI::RegPairOdd, OtherReg);
+    }
   }
 }
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 13bef54..36f63e2 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -86,7 +86,7 @@ protected:
   }
 
 public:
-  virtual ~ARMConstantPoolValue();
+  ~ARMConstantPoolValue() override;
 
   ARMCP::ARMCPModifier getModifier() const { return Modifier; }
   const char *getModifierText() const;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 830953b..9d2b09b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -311,6 +311,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     return;
 
   StackAdjustingInsts DefCFAOffsetCandidates;
+  bool HasFP = hasFP(MF);
 
   // Allocate the vararg register save area.
   if (ArgRegsSaveSize) {
@@ -327,6 +328,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       DefCFAOffsetCandidates.addInst(std::prev(MBBI),
                                      NumBytes - ArgRegsSaveSize, true);
     }
+    DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
     return;
   }
 
@@ -375,7 +377,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   }
 
   // Determine starting offsets of spill areas.
-  bool HasFP = hasFP(MF);
   unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
   unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 3b1b8dd..72afd2c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3504,25 +3504,34 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   ARMCC::CondCodes CondCode, CondCode2;
   FPCCToARMCC(CC, CondCode, CondCode2);
 
-  // Try to generate VSEL on ARMv8.
+  // Try to generate VMAXNM/VMINNM on ARMv8.
   if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
                                   TrueVal.getValueType() == MVT::f64)) {
-    // We can select VMAXNM/VMINNM from a compare followed by a select with the
+    // We can use VMAXNM/VMINNM for a compare followed by a select with the
     // same operands, as follows:
-    //   c = fcmp [ogt, olt, ugt, ult] a, b
+    //   c = fcmp [?gt, ?ge, ?lt, ?le] a, b
     //   select c, a, b
-    // We only do this in unsafe-fp-math, because signed zeros and NaNs are
-    // handled differently than the original code sequence.
+    // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
+    // We only do this transformation in UnsafeFPMath and for no-NaNs
+    // comparisons, because signed zeros and NaNs are handled differently than
+    // the original code sequence.
+    // FIXME: There are more cases that can be transformed even with NaNs,
+    // signed zeroes and safe math.  E.g. in the following, the result will be
+    // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too.
+    //   c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0.
+    // FIXME: There is similar code that allows some extensions in
+    // AArch64TargetLowering::LowerSELECT_CC that should be shared with this
+    // code.
     if (getTargetMachine().Options.UnsafeFPMath) {
       if (LHS == TrueVal && RHS == FalseVal) {
-        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+        if (CC == ISD::SETGT || CC == ISD::SETGE)
           return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
-        if (CC == ISD::SETOLT || CC == ISD::SETULT)
+        if (CC == ISD::SETLT || CC == ISD::SETLE)
           return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
       } else if (LHS == FalseVal && RHS == TrueVal) {
-        if (CC == ISD::SETOLT || CC == ISD::SETULT)
+        if (CC == ISD::SETLT || CC == ISD::SETLE)
           return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
-        if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+        if (CC == ISD::SETGT || CC == ISD::SETGE)
           return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
       }
     }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c3984ca..52f3555 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -208,6 +208,8 @@ def HasV8            : Predicate<"Subtarget->hasV8Ops()">,
                                  AssemblerPredicate<"HasV8Ops", "armv8">;
 def PreV8            : Predicate<"!Subtarget->hasV8Ops()">,
                                  AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
+                                 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
 def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
 def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
                                  AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -226,8 +228,6 @@ def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
                                  AssemblerPredicate<"FeatureCrypto", "crypto">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
-def HasV8_1a         : Predicate<"Subtarget->hasV8_1a()">,
-                                 AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
 def HasFP16          : Predicate<"Subtarget->hasFP16()">,
                                  AssemblerPredicate<"FeatureFP16","half-float">;
 def HasDivide        : Predicate<"Subtarget->hasDivide()">,
@@ -388,6 +388,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
 // Immediate operands with a shared generic asm render method.
 class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
 
+// Operands that are part of a memory addressing mode.
+class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; }
+
 // Branch target.
 // FIXME: rename brtarget to t2_brtarget
 def brtarget : Operand<OtherVT> {
@@ -790,7 +793,7 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
 // addrmode_imm12 := reg +/- imm12
 //
 def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-class AddrMode_Imm12 : Operand<i32>,
+class AddrMode_Imm12 : MemOperand,
                      ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
   // 12-bit immediate operand. Note that instructions using this encode
   // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
@@ -813,7 +816,7 @@ def addrmode_imm12_pre : AddrMode_Imm12 {
 // ldst_so_reg := reg +/- reg shop imm
 //
 def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
-def ldst_so_reg : Operand<i32>,
+def ldst_so_reg : MemOperand,
                   ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
   let EncoderMethod = "getLdStSORegOpValue";
   // FIXME: Simplify the printer
@@ -829,7 +832,7 @@ def ldst_so_reg : Operand<i32>,
 //  {8}       1 is imm8 is non-negative. 0 otherwise.
 //  {7-0}     [0,255] imm8 value.
 def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
-def postidx_imm8 : Operand<i32> {
+def postidx_imm8 : MemOperand {
   let PrintMethod = "printPostIdxImm8Operand";
   let ParserMatchClass = PostIdxImm8AsmOperand;
   let MIOperandInfo = (ops i32imm);
@@ -841,7 +844,7 @@ def postidx_imm8 : Operand<i32> {
 //  {8}       1 is imm8 is non-negative. 0 otherwise.
 //  {7-0}     [0,255] imm8 value, scaled by 4.
 def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
-def postidx_imm8s4 : Operand<i32> {
+def postidx_imm8s4 : MemOperand {
   let PrintMethod = "printPostIdxImm8s4Operand";
   let ParserMatchClass = PostIdxImm8s4AsmOperand;
   let MIOperandInfo = (ops i32imm);
@@ -854,7 +857,7 @@ def PostIdxRegAsmOperand : AsmOperandClass {
   let Name = "PostIdxReg";
   let ParserMethod = "parsePostIdxReg";
 }
-def postidx_reg : Operand<i32> {
+def postidx_reg : MemOperand {
   let EncoderMethod = "getPostIdxRegOpValue";
   let DecoderMethod = "DecodePostIdxReg";
   let PrintMethod = "printPostIdxRegOperand";
@@ -869,7 +872,7 @@ def postidx_reg : Operand<i32> {
 // FIXME: addrmode2 should be refactored the rest of the way to always
 // use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
 def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
-def addrmode2 : Operand<i32>,
+def addrmode2 : MemOperand,
                 ComplexPattern<i32, 3, "SelectAddrMode2", []> {
   let EncoderMethod = "getAddrMode2OpValue";
   let PrintMethod = "printAddrMode2Operand";
@@ -881,7 +884,7 @@ def PostIdxRegShiftedAsmOperand : AsmOperandClass {
   let Name = "PostIdxRegShifted";
   let ParserMethod = "parsePostIdxReg";
 }
-def am2offset_reg : Operand<i32>,
+def am2offset_reg : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
                 [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode2OffsetOpValue";
@@ -894,7 +897,7 @@ def am2offset_reg : Operand<i32>,
 // FIXME: am2offset_imm should only need the immediate, not the GPR. Having
 // the GPR is purely vestigal at this point.
 def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
-def am2offset_imm : Operand<i32>,
+def am2offset_imm : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
                 [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode2OffsetOpValue";
@@ -909,7 +912,7 @@ def am2offset_imm : Operand<i32>,
 //
 // FIXME: split into imm vs. reg versions.
 def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-class AddrMode3 : Operand<i32>,
+class AddrMode3 : MemOperand,
                   ComplexPattern<i32, 3, "SelectAddrMode3", []> {
   let EncoderMethod = "getAddrMode3OpValue";
   let ParserMatchClass = AddrMode3AsmOperand;
@@ -932,7 +935,7 @@ def AM3OffsetAsmOperand : AsmOperandClass {
   let Name = "AM3Offset";
   let ParserMethod = "parseAM3Offset";
 }
-def am3offset : Operand<i32>,
+def am3offset : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode3Offset",
                                [], [SDNPWantRoot]> {
   let EncoderMethod = "getAddrMode3OffsetOpValue";
@@ -951,7 +954,7 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
 // addrmode5 := reg +/- imm8*4
 //
 def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-class AddrMode5 : Operand<i32>,
+class AddrMode5 : MemOperand,
                   ComplexPattern<i32, 2, "SelectAddrMode5", []> {
   let EncoderMethod = "getAddrMode5OpValue";
   let DecoderMethod = "DecodeAddrMode5Operand";
@@ -970,7 +973,7 @@ def addrmode5_pre : AddrMode5 {
 // addrmode6 := reg with optional alignment
 //
 def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
-def addrmode6 : Operand<i32>,
+def addrmode6 : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
@@ -979,7 +982,7 @@ def addrmode6 : Operand<i32>,
   let ParserMatchClass = AddrMode6AsmOperand;
 }
 
-def am6offset : Operand<i32>,
+def am6offset : MemOperand,
                 ComplexPattern<i32, 1, "SelectAddrMode6Offset",
                                [], [SDNPWantRoot]> {
   let PrintMethod = "printAddrMode6OffsetOperand";
@@ -990,7 +993,7 @@ def am6offset : Operand<i32>,
 
 // Special version of addrmode6 to handle alignment encoding for VST1/VLD1
 // (single element from one lane) for size 32.
-def addrmode6oneL32 : Operand<i32>,
+def addrmode6oneL32 : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -998,7 +1001,7 @@ def addrmode6oneL32 : Operand<i32>,
 }
 
 // Base class for addrmode6 with specific alignment restrictions.
-class AddrMode6Align : Operand<i32>,
+class AddrMode6Align : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
@@ -1074,7 +1077,7 @@ def addrmode6align64or128or256 : AddrMode6Align {
 
 // Special version of addrmode6 to handle alignment encoding for VLD-dup
 // instructions, specifically VLD4-dup.
-def addrmode6dup : Operand<i32>,
+def addrmode6dup : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -1085,7 +1088,7 @@ def addrmode6dup : Operand<i32>,
 }
 
 // Base class for addrmode6dup with specific alignment restrictions.
-class AddrMode6DupAlign : Operand<i32>,
+class AddrMode6DupAlign : MemOperand,
                 ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -1149,7 +1152,7 @@ def addrmode6dupalign64or128 : AddrMode6DupAlign {
 
 // addrmodepc := pc + reg
 //
-def addrmodepc : Operand<i32>,
+def addrmodepc : MemOperand,
                  ComplexPattern<i32, 2, "SelectAddrModePC", []> {
   let PrintMethod = "printAddrModePCOperand";
   let MIOperandInfo = (ops GPR, i32imm);
@@ -1158,7 +1161,7 @@ def addrmodepc : Operand<i32>,
 // addr_offset_none := reg
 //
 def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
-def addr_offset_none : Operand<i32>,
+def addr_offset_none : MemOperand,
                        ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
   let PrintMethod = "printAddrMode7Operand";
   let DecoderMethod = "DecodeAddrMode7Operand";
@@ -1417,7 +1420,8 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
 let isCompare = 1, Defs = [CPSR] in {
 multiclass AI1_cmp_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                       PatFrag opnode, bit Commutable = 0> {
+                       PatFrag opnode, bit Commutable = 0,
+                       string rrDecoderMethod = ""> {
   def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
                opc, "\t$Rn, $imm",
                [(opnode GPR:$Rn, mod_imm:$imm)]>,
@@ -1445,6 +1449,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
     let Inst{15-12} = 0b0000;
     let Inst{11-4} = 0b00000000;
     let Inst{3-0} = Rm;
+    let DecoderMethod = rrDecoderMethod;
 
     let Unpredictable{15-12} = 0b1111;
   }
@@ -4263,6 +4268,30 @@ def CRC32W  : AI_crc32<0, 0b10, "w", int_arm_crc32w>;
 def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>;
 
 //===----------------------------------------------------------------------===//
+// ARMv8.1a Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan",
+                "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> {
+  bits<1> imm;
+
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010001;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-10} = 0b000000;
+  let Inst{9} = imm;
+  let Inst{8} = 0b0;
+  let Inst{7-4} = 0b0000;
+  let Inst{3-0} = 0b0000;
+
+  let Unpredictable{19-16} = 0b1111;
+  let Unpredictable{15-10} = 0b111111;
+  let Unpredictable{8} = 0b1;
+  let Unpredictable{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
 //  Comparison Instructions...
 //
 
@@ -4366,7 +4395,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm),
 // Note that TST/TEQ don't set all the same flags that CMP does!
 defm TST  : AI1_cmp_irs<0b1000, "tst",
                         IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
-                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1,
+                      "DecodeTSTInstruction">;
 defm TEQ  : AI1_cmp_irs<0b1001, "teq",
                         IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
                       BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 3c62e0e..d0ade77 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -142,7 +142,7 @@ def t_blxtarget : Operand<i32> {
 
 // t_addrmode_pc := <label> => pc + imm8 * 4
 //
-def t_addrmode_pc : Operand<i32> {
+def t_addrmode_pc : MemOperand {
   let EncoderMethod = "getAddrModePCOpValue";
   let DecoderMethod = "DecodeThumbAddrModePC";
   let PrintMethod = "printThumbLdrLabelOperand";
@@ -153,7 +153,7 @@ def t_addrmode_pc : Operand<i32> {
 // t_addrmode_rr := reg + reg
 //
 def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
-def t_addrmode_rr : Operand<i32>,
+def t_addrmode_rr : MemOperand,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let PrintMethod = "printThumbAddrModeRROperand";
@@ -169,7 +169,7 @@ def t_addrmode_rr : Operand<i32>,
 // the reg+imm forms will match instead. This is a horrible way to do that,
 // as it forces tight coupling between the methods, but it's how selectiondag
 // currently works.
-def t_addrmode_rrs1 : Operand<i32>,
+def t_addrmode_rrs1 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let PrintMethod = "printThumbAddrModeRROperand";
@@ -177,7 +177,7 @@ def t_addrmode_rrs1 : Operand<i32>,
   let ParserMatchClass = t_addrmode_rr_asm_operand;
   let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
 }
-def t_addrmode_rrs2 : Operand<i32>,
+def t_addrmode_rrs2 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let DecoderMethod = "DecodeThumbAddrModeRR";
@@ -185,7 +185,7 @@ def t_addrmode_rrs2 : Operand<i32>,
   let ParserMatchClass = t_addrmode_rr_asm_operand;
   let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
 }
-def t_addrmode_rrs4 : Operand<i32>,
+def t_addrmode_rrs4 : MemOperand,
                       ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
   let EncoderMethod = "getThumbAddrModeRegRegOpValue";
   let DecoderMethod = "DecodeThumbAddrModeRR";
@@ -197,7 +197,7 @@ def t_addrmode_rrs4 : Operand<i32>,
 // t_addrmode_is4 := reg + imm5 * 4
 //
 def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
-def t_addrmode_is4 : Operand<i32>,
+def t_addrmode_is4 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -209,7 +209,7 @@ def t_addrmode_is4 : Operand<i32>,
 // t_addrmode_is2 := reg + imm5 * 2
 //
 def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
-def t_addrmode_is2 : Operand<i32>,
+def t_addrmode_is2 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -221,7 +221,7 @@ def t_addrmode_is2 : Operand<i32>,
 // t_addrmode_is1 := reg + imm5
 //
 def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
-def t_addrmode_is1 : Operand<i32>,
+def t_addrmode_is1 : MemOperand,
                      ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
   let EncoderMethod = "getAddrModeISOpValue";
   let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -235,7 +235,7 @@ def t_addrmode_is1 : Operand<i32>,
 // FIXME: This really shouldn't have an explicit SP operand at all. It should
 // be implicit, just like in the instruction encoding itself.
 def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
-def t_addrmode_sp : Operand<i32>,
+def t_addrmode_sp : MemOperand,
                     ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
   let EncoderMethod = "getAddrModeThumbSPOpValue";
   let DecoderMethod = "DecodeThumbAddrModeSP";
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 10b0a0e..103ee00 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -148,7 +148,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
 
 // t2addrmode_imm12  := reg + imm12
 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
-def t2addrmode_imm12 : Operand<i32>,
+def t2addrmode_imm12 : MemOperand,
                        ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
   let PrintMethod = "printAddrModeImm12Operand<false>";
   let EncoderMethod = "getAddrModeImm12OpValue";
@@ -178,7 +178,7 @@ def t2adrlabel : Operand<i32> {
 
 // t2addrmode_posimm8  := reg + imm8
 def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
-def t2addrmode_posimm8 : Operand<i32> {
+def t2addrmode_posimm8 : MemOperand {
   let PrintMethod = "printT2AddrModeImm8Operand<false>";
   let EncoderMethod = "getT2AddrModeImm8OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8";
@@ -188,7 +188,7 @@ def t2addrmode_posimm8 : Operand<i32> {
 
 // t2addrmode_negimm8  := reg - imm8
 def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
-def t2addrmode_negimm8 : Operand<i32>,
+def t2addrmode_negimm8 : MemOperand,
                       ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let PrintMethod = "printT2AddrModeImm8Operand<false>";
   let EncoderMethod = "getT2AddrModeImm8OpValue";
@@ -199,7 +199,7 @@ def t2addrmode_negimm8 : Operand<i32>,
 
 // t2addrmode_imm8  := reg +/- imm8
 def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
-class T2AddrMode_Imm8 : Operand<i32>,
+class T2AddrMode_Imm8 : MemOperand,
                         ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let EncoderMethod = "getT2AddrModeImm8OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8";
@@ -215,7 +215,7 @@ def t2addrmode_imm8_pre : T2AddrMode_Imm8 {
   let PrintMethod = "printT2AddrModeImm8Operand<true>";
 }
 
-def t2am_imm8_offset : Operand<i32>,
+def t2am_imm8_offset : MemOperand,
                        ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
                                       [], [SDNPWantRoot]> {
   let PrintMethod = "printT2AddrModeImm8OffsetOperand";
@@ -225,7 +225,7 @@ def t2am_imm8_offset : Operand<i32>,
 
 // t2addrmode_imm8s4  := reg +/- (imm8 << 2)
 def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : Operand<i32> {
+class T2AddrMode_Imm8s4 : MemOperand {
   let EncoderMethod = "getT2AddrModeImm8s4OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm8s4";
   let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -241,7 +241,7 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
 }
 
 def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
-def t2am_imm8s4_offset : Operand<i32> {
+def t2am_imm8s4_offset : MemOperand {
   let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
   let EncoderMethod = "getT2Imm8s4OpValue";
   let DecoderMethod = "DecodeT2Imm8S4";
@@ -251,7 +251,7 @@ def t2am_imm8s4_offset : Operand<i32> {
 def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
   let Name = "MemImm0_1020s4Offset";
 }
-def t2addrmode_imm0_1020s4 : Operand<i32>,
+def t2addrmode_imm0_1020s4 : MemOperand,
                          ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> {
   let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
   let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
@@ -262,7 +262,7 @@ def t2addrmode_imm0_1020s4 : Operand<i32>,
 
 // t2addrmode_so_reg  := reg + (reg << imm2)
 def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
-def t2addrmode_so_reg : Operand<i32>,
+def t2addrmode_so_reg : MemOperand,
                         ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
   let PrintMethod = "printT2AddrModeSoRegOperand";
   let EncoderMethod = "getT2AddrModeSORegOpValue";
@@ -273,13 +273,13 @@ def t2addrmode_so_reg : Operand<i32>,
 
 // Addresses for the TBB/TBH instructions.
 def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
-def addrmode_tbb : Operand<i32> {
+def addrmode_tbb : MemOperand {
   let PrintMethod = "printAddrModeTBB";
   let ParserMatchClass = addrmode_tbb_asmoperand;
   let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
 }
 def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
-def addrmode_tbh : Operand<i32> {
+def addrmode_tbh : MemOperand {
   let PrintMethod = "printAddrModeTBH";
   let ParserMatchClass = addrmode_tbh_asmoperand;
   let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
@@ -3630,8 +3630,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
 
 // Branch and Exchange Jazelle -- for disassembly only
 // Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
-    Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> {
+def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>,
+    Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> {
   bits<4> func;
   let Inst{31-27} = 0b11110;
   let Inst{26} = 0;
@@ -4281,6 +4281,23 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
 
 
 //===----------------------------------------------------------------------===//
+// ARMv8.1 Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>,
+               T1Misc<0b0110000>, Requires<[IsThumb2, HasV8, HasV8_1a]> {
+  bits<1> imm;
+
+  let Inst{4} = 0b1;
+  let Inst{3} = imm;
+  let Inst{2-0} = 0b000;
+
+  let Unpredictable{4} = 0b1;
+  let Unpredictable{2-0} = 0b111;
+}
+
+//===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
 
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index fbec9e6..2a3e1da 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -133,6 +133,7 @@ void ARMSubtarget::initializeEnvironment() {
   HasV6T2Ops = false;
   HasV7Ops = false;
   HasV8Ops = false;
+  HasV8_1aOps = false;
   HasVFPv2 = false;
   HasVFPv3 = false;
   HasVFPv4 = false;
@@ -166,7 +167,6 @@ void ARMSubtarget::initializeEnvironment() {
   HasTrustZone = false;
   HasCrypto = false;
   HasCRC = false;
-  HasV8_1a = false;
   HasZeroCycleZeroing = false;
   AllowsUnalignedMem = false;
   Thumb2DSP = false;
@@ -191,7 +191,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
       ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -254,7 +254,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
 
   switch (IT) {
   case DefaultIT:
-    RestrictIT = hasV8Ops() ? true : false;
+    RestrictIT = hasV8Ops();
     break;
   case RestrictedIT:
     RestrictIT = true;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f36cd5c..d82314d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -43,7 +43,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {
     Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
-    CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait, 
+    CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait,
   };
   enum ARMProcClassEnum {
     None, AClass, RClass, MClass
@@ -67,6 +67,7 @@ protected:
   bool HasV6T2Ops;
   bool HasV7Ops;
   bool HasV8Ops;
+  bool HasV8_1aOps;
 
   /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
   /// floating point ISAs are supported.
@@ -182,9 +183,6 @@ protected:
   /// HasCRC - if true, processor supports CRC instructions
   bool HasCRC;
 
-  /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions
-  bool HasV8_1a;
-
   /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
   /// particularly effective at zeroing a VFP register.
   bool HasZeroCycleZeroing;
@@ -295,6 +293,7 @@ public:
   bool hasV6T2Ops() const { return HasV6T2Ops; }
   bool hasV7Ops()   const { return HasV7Ops;  }
   bool hasV8Ops()   const { return HasV8Ops;  }
+  bool hasV8_1aOps() const { return HasV8_1aOps; }
 
   bool isCortexA5() const { return ARMProcFamily == CortexA5; }
   bool isCortexA7() const { return ARMProcFamily == CortexA7; }
@@ -316,7 +315,6 @@ public:
   bool hasNEON() const { return HasNEON;  }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
-  bool hasV8_1a() const { return HasV8_1a; }
   bool hasVirtualization() const { return HasVirtualization; }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 1bee1b0..ae33340 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -42,6 +42,11 @@ EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
                       cl::desc("Enable ARM load/store optimization pass"),
                       cl::init(true));
 
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("arm-global-merge", cl::Hidden,
+                  cl::desc("Enable the global merge pass"));
+
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
   RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -332,7 +337,9 @@ void ARMPassConfig::addIRPasses() {
 }
 
 bool ARMPassConfig::addPreISel() {
-  if (TM->getOptLevel() == CodeGenOpt::Aggressive)
+  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+       EnableGlobalMerge == cl::BOU_UNSET) ||
+      EnableGlobalMerge == cl::BOU_TRUE)
     // FIXME: This is using the thumb1 only constant value for
     // maximal global offset for merging globals. We may want
     // to look into using the old value for non-thumb1 code of
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2215efb..b9ad2c8 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -276,8 +276,8 @@ class ARMAsmParser : public MCTargetAsmParser {
   bool hasD16() const {
     return STI.getFeatureBits() & ARM::FeatureD16;
   }
-  bool hasV8_1a() const {
-    return STI.getFeatureBits() & ARM::FeatureV8_1a;
+  bool hasV8_1aOps() const {
+    return STI.getFeatureBits() & ARM::HasV8_1aOps;
   }
 
   void SwitchMode() {
@@ -5418,47 +5418,44 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
 /// inclusion of carry set or predication code operands.
 //
 // FIXME: It would be nice to autogen this.
-void ARMAsmParser::
-getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
-                     bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
-  if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+                                         bool &CanAcceptCarrySet,
+                                         bool &CanAcceptPredicationCode) {
+  CanAcceptCarrySet =
+      Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
       Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
-      Mnemonic == "add" || Mnemonic == "adc" ||
-      Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
-      Mnemonic == "orr" || Mnemonic == "mvn" ||
-      Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
-      Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
-      Mnemonic == "vfm" || Mnemonic == "vfnm" ||
-      (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
-                      Mnemonic == "mla" || Mnemonic == "smlal" ||
-                      Mnemonic == "umlal" || Mnemonic == "umull"))) {
-    CanAcceptCarrySet = true;
-  } else
-    CanAcceptCarrySet = false;
+      Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" ||
+      Mnemonic == "bic" || Mnemonic == "asr" || Mnemonic == "orr" ||
+      Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" ||
+      Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" ||
+      Mnemonic == "neg" || Mnemonic == "vfm" || Mnemonic == "vfnm" ||
+      (!isThumb() &&
+       (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" ||
+        Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"));
 
   if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
-      Mnemonic == "cps" ||  Mnemonic == "it" ||  Mnemonic == "cbz" ||
+      Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
       Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" ||
       Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") ||
-      Mnemonic.startswith("vsel") ||
-      Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
-      Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
-      Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
-      Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" ||
+      Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" ||
+      Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" ||
+      Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" ||
+      Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" ||
+      Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
       Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
       (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
     // These mnemonics are never predicable
     CanAcceptPredicationCode = false;
   } else if (!isThumb()) {
     // Some instructions are only predicable in Thumb mode
-    CanAcceptPredicationCode
-      = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
+    CanAcceptPredicationCode =
+        Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
         Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" &&
         Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" &&
         Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" &&
-        Mnemonic != "ldc2" && Mnemonic != "ldc2l" &&
-        Mnemonic != "stc2" && Mnemonic != "stc2l" &&
-        !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
+        Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" &&
+        Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") &&
+        !Mnemonic.startswith("srs");
   } else if (isThumbOne()) {
     if (hasV6MOps())
       CanAcceptPredicationCode = Mnemonic != "movs";
@@ -6153,6 +6150,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
                    "destination operands can't be identical");
     return false;
   }
+  case ARM::t2BXJ: {
+    const unsigned RmReg = Inst.getOperand(0).getReg();
+    // Rm = SP is no longer unpredictable in v8-A
+    if (RmReg == ARM::SP && !hasV8Ops())
+      return Error(Operands[2]->getStartLoc(),
+                   "r13 (SP) is an unpredictable operand to BXJ");
+    return false;
+  }
   case ARM::STRD: {
     // Rt2 must be Rt + 1.
     unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4d5122a..4c169a8 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -91,7 +91,7 @@ public:
     MCDisassembler(STI, Ctx) {
   }
 
-  ~ARMDisassembler() {}
+  ~ARMDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -106,7 +106,7 @@ public:
     MCDisassembler(STI, Ctx) {
   }
 
-  ~ThumbDisassembler() {}
+  ~ThumbDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -212,6 +212,10 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
@@ -2119,6 +2123,54 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+                                  uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+  if (Pred == 0xF)
+    return DecodeSETPANInstruction(Inst, Insn, Address, Decoder);
+
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodePredicateOperand(Inst, Pred, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+                                  uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Imm = fieldFromInstruction(Insn, 9, 1);
+
+  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+  uint64_t FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
+  if ((FeatureBits & ARM::HasV8_1aOps) == 0 || 
+      (FeatureBits & ARM::HasV8Ops) == 0 )
+    return MCDisassembler::Fail;
+
+  // Decoder can be called from DecodeTST, which does not check the full
+  // encoding is valid.
+  if (fieldFromInstruction(Insn, 20,12) != 0xf11 ||
+      fieldFromInstruction(Insn, 4,4) != 0)
+    return MCDisassembler::Fail;
+  if (fieldFromInstruction(Insn, 10,10) != 0 ||
+      fieldFromInstruction(Insn, 0,4) != 0)
+    S = MCDisassembler::SoftFail;
+
+  Inst.setOpcode(ARM::SETPAN);
+  Inst.addOperand(MCOperand::CreateImm(Imm));
+
+  return S;
+}
+
 static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index e15323d..c2e1b2a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -40,12 +40,12 @@ static unsigned translateShiftImm(unsigned imm) {
 
 /// Prints the shift value with an immediate value.
 static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
-                          unsigned ShImm, bool UseMarkup) {
+                             unsigned ShImm, bool UseMarkup) {
   if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
     return;
   O << ", ";
 
-  assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+  assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
   O << getShiftOpcStr(ShOpc);
 
   if (ShOpc != ARM_AM::rrx) {
@@ -58,49 +58,52 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
   }
 }
 
-ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
-                               const MCInstrInfo &MII,
-                               const MCRegisterInfo &MRI,
-                               const MCSubtargetInfo &STI) :
-  MCInstPrinter(MAI, MII, MRI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(STI.getFeatureBits());
-}
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                               const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << markup("<reg:")
-     << getRegisterName(RegNo)
-     << markup(">");
+  OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
 }
 
 void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot) {
+                               StringRef Annot, const MCSubtargetInfo &STI) {
   unsigned Opcode = MI->getOpcode();
 
-  switch(Opcode) {
+  switch (Opcode) {
 
   // Check for HINT instructions w/ canonical names.
   case ARM::HINT:
   case ARM::tHINT:
   case ARM::t2HINT:
     switch (MI->getOperand(0).getImm()) {
-    case 0: O << "\tnop"; break;
-    case 1: O << "\tyield"; break;
-    case 2: O << "\twfe"; break;
-    case 3: O << "\twfi"; break;
-    case 4: O << "\tsev"; break;
+    case 0:
+      O << "\tnop";
+      break;
+    case 1:
+      O << "\tyield";
+      break;
+    case 2:
+      O << "\twfe";
+      break;
+    case 3:
+      O << "\twfi";
+      break;
+    case 4:
+      O << "\tsev";
+      break;
     case 5:
-      if ((getAvailableFeatures() & ARM::HasV8Ops)) {
+      if ((STI.getFeatureBits() & ARM::HasV8Ops)) {
         O << "\tsevl";
         break;
       } // Fallthrough for non-v8
     default:
       // Anything else should just print normally.
-      printInstruction(MI, O);
+      printInstruction(MI, STI, O);
       printAnnotation(O, Annot);
       return;
     }
-    printPredicateOperand(MI, 1, O);
+    printPredicateOperand(MI, 1, STI, O);
     if (Opcode == ARM::t2HINT)
       O << ".w";
     printAnnotation(O, Annot);
@@ -115,8 +118,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     const MCOperand &MO3 = MI->getOperand(3);
 
     O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
-    printSBitModifierOperand(MI, 6, O);
-    printPredicateOperand(MI, 4, O);
+    printSBitModifierOperand(MI, 6, STI, O);
+    printPredicateOperand(MI, 4, STI, O);
 
     O << '\t';
     printRegName(O, Dst.getReg());
@@ -137,8 +140,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     const MCOperand &MO2 = MI->getOperand(2);
 
     O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
-    printSBitModifierOperand(MI, 5, O);
-    printPredicateOperand(MI, 3, O);
+    printSBitModifierOperand(MI, 5, STI, O);
+    printPredicateOperand(MI, 3, STI, O);
 
     O << '\t';
     printRegName(O, Dst.getReg());
@@ -150,10 +153,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
       return;
     }
 
-    O << ", "
-      << markup("<imm:")
-      << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()))
-      << markup(">");
+    O << ", " << markup("<imm:") << "#"
+      << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">");
     printAnnotation(O, Annot);
     return;
   }
@@ -164,11 +165,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
       // Should only print PUSH if there are at least two registers in the list.
       O << '\t' << "push";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       if (Opcode == ARM::t2STMDB_UPD)
         O << ".w";
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -178,7 +179,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(2).getReg() == ARM::SP &&
         MI->getOperand(3).getImm() == -4) {
       O << '\t' << "push";
-      printPredicateOperand(MI, 4, O);
+      printPredicateOperand(MI, 4, STI, O);
       O << "\t{";
       printRegName(O, MI->getOperand(1).getReg());
       O << "}";
@@ -193,11 +194,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
       // Should only print POP if there are at least two registers in the list.
       O << '\t' << "pop";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       if (Opcode == ARM::t2LDMIA_UPD)
         O << ".w";
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -207,7 +208,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     if (MI->getOperand(2).getReg() == ARM::SP &&
         MI->getOperand(4).getImm() == 4) {
       O << '\t' << "pop";
-      printPredicateOperand(MI, 5, O);
+      printPredicateOperand(MI, 5, STI, O);
       O << "\t{";
       printRegName(O, MI->getOperand(0).getReg());
       O << "}";
@@ -221,9 +222,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   case ARM::VSTMDDB_UPD:
     if (MI->getOperand(0).getReg() == ARM::SP) {
       O << '\t' << "vpush";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -234,9 +235,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   case ARM::VLDMDIA_UPD:
     if (MI->getOperand(0).getReg() == ARM::SP) {
       O << '\t' << "vpop";
-      printPredicateOperand(MI, 2, O);
+      printPredicateOperand(MI, 2, STI, O);
       O << '\t';
-      printRegisterList(MI, 4, O);
+      printRegisterList(MI, 4, STI, O);
       printAnnotation(O, Annot);
       return;
     } else
@@ -252,12 +253,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
     O << "\tldm";
 
-    printPredicateOperand(MI, 1, O);
+    printPredicateOperand(MI, 1, STI, O);
     O << '\t';
     printRegName(O, BaseReg);
-    if (Writeback) O << "!";
+    if (Writeback)
+      O << "!";
     O << ", ";
-    printRegisterList(MI, 3, O);
+    printRegisterList(MI, 3, STI, O);
     printAnnotation(O, Annot);
     return;
   }
@@ -268,9 +270,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   // GPRs. However, when decoding them, the two GRPs cannot be automatically
   // expressed as a GPRPair, so we have to manually merge them.
   // FIXME: We would really like to be able to tablegen'erate this.
-  case ARM::LDREXD: case ARM::STREXD:
-  case ARM::LDAEXD: case ARM::STLEXD: {
-    const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+  case ARM::LDREXD:
+  case ARM::STREXD:
+  case ARM::LDAEXD:
+  case ARM::STLEXD: {
+    const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID);
     bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
     unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
     if (MRC.contains(Reg)) {
@@ -280,28 +284,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
       if (isStore)
         NewMI.addOperand(MI->getOperand(0));
-      NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
-        &MRI.getRegClass(ARM::GPRPairRegClassID)));
+      NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(
+          Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID)));
       NewMI.addOperand(NewReg);
 
       // Copy the rest operands into NewMI.
-      for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+      for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
         NewMI.addOperand(MI->getOperand(i));
-      printInstruction(&NewMI, O);
+      printInstruction(&NewMI, STI, O);
       return;
     }
     break;
   }
-    // B9.3.3 ERET (Thumb)
-    // For a target that has Virtualization Extensions, ERET is the preferred
-    // disassembly of SUBS PC, LR, #0
+  // B9.3.3 ERET (Thumb)
+  // For a target that has Virtualization Extensions, ERET is the preferred
+  // disassembly of SUBS PC, LR, #0
   case ARM::t2SUBS_PC_LR: {
-    if (MI->getNumOperands() == 3 &&
-        MI->getOperand(0).isImm() &&
+    if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() &&
         MI->getOperand(0).getImm() == 0 &&
-        (getAvailableFeatures() & ARM::FeatureVirtualization)) {
+        (STI.getFeatureBits() & ARM::FeatureVirtualization)) {
       O << "\teret";
-      printPredicateOperand(MI, 1, O);
+      printPredicateOperand(MI, 1, STI, O);
       printAnnotation(O, Annot);
       return;
     }
@@ -309,20 +312,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   }
   }
 
-  printInstruction(MI, O);
+  printInstruction(MI, STI, O);
   printAnnotation(O, Annot);
 }
 
 void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
     unsigned Reg = Op.getReg();
     printRegName(O, Reg);
   } else if (Op.isImm()) {
-    O << markup("<imm:")
-      << '#' << formatImm(Op.getImm())
-      << markup(">");
+    O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">");
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     const MCExpr *Expr = Op.getExpr();
@@ -354,6 +355,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   if (MO1.isExpr()) {
@@ -370,13 +372,9 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << markup("<imm:")
-      << "#-" << formatImm(-OffImm)
-      << markup(">");
+    O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
   } else {
-    O << markup("<imm:")
-      << "#" << formatImm(OffImm)
-      << markup(">");
+    O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
   }
   O << "]" << markup(">");
 }
@@ -387,10 +385,11 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
 //    REG REG 0,SH_OPC    - e.g. R5, ROR R3
 //    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
 void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
 
   printRegName(O, MO1.getReg());
 
@@ -406,9 +405,10 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   printRegName(O, MO1.getReg());
 
@@ -417,28 +417,25 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
                    ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
 }
 
-
 //===--------------------------------------------------------------------===//
 // Addressing Mode #2
 //===--------------------------------------------------------------------===//
 
 void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
 
   if (!MO2.getReg()) {
     if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
-      O << ", "
-        << markup("<imm:")
-        << "#"
+      O << ", " << markup("<imm:") << "#"
         << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
-        << ARM_AM::getAM2Offset(MO3.getImm())
-        << markup(">");
+        << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">");
     }
     O << "]" << markup(">");
     return;
@@ -454,9 +451,10 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
-                                           raw_ostream &O) {
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   O << ", ";
@@ -465,9 +463,10 @@ void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
-                                           raw_ostream &O) {
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   O << ", ";
@@ -476,35 +475,35 @@ void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
 #ifndef NDEBUG
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
   unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
-  assert(IdxMode != ARMII::IndexModePost &&
-         "Should be pre or offset index op");
+  assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op");
 #endif
 
-  printAM2PreOrOffsetIndexOp(MI, Op, O);
+  printAM2PreOrOffsetIndexOp(MI, Op, STI, O);
 }
 
 void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   if (!MO1.getReg()) {
     unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
-    O << markup("<imm:")
-      << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
-      << ImmOffs
+    O << markup("<imm:") << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs
       << markup(">");
     return;
   }
@@ -524,8 +523,8 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
                                                 raw_ostream &O,
                                                 bool AlwaysPrintImm0) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  const MCOperand &MO2 = MI->getOperand(Op+1);
-  const MCOperand &MO3 = MI->getOperand(Op+2);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+  const MCOperand &MO3 = MI->getOperand(Op + 2);
 
   O << markup("<mem:") << '[';
   printRegName(O, MO1.getReg());
@@ -537,16 +536,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
     return;
   }
 
-  //If the op is sub we have to print the immediate even if it is 0
+  // If the op is sub we have to print the immediate even if it is 0
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
   ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
 
   if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
-    O << ", "
-      << markup("<imm:")
-      << "#"
-      << ARM_AM::getAddrOpcStr(op)
-      << ImmOffs
+    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs
       << markup(">");
   }
   O << ']' << markup(">");
@@ -554,10 +549,11 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
-  if (!MO1.isReg()) {   //  For label symbolic references.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { //  For label symbolic references.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
@@ -569,9 +565,10 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
 
 void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   if (MO1.getReg()) {
     O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
@@ -580,56 +577,56 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
   }
 
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
-  O << markup("<imm:")
-    << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+  O << markup("<imm:") << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
     << markup(">");
 }
 
-void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
-                                             unsigned OpNum,
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+                                             const MCSubtargetInfo &STI,
                                              raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   unsigned Imm = MO.getImm();
-  O << markup("<imm:")
-    << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
     << markup(">");
 }
 
 void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << (MO2.getImm() ? "" : "-");
   printRegName(O, MO1.getReg());
 }
 
-void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
-                                             unsigned OpNum,
-                                             raw_ostream &O) {
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   unsigned Imm = MO.getImm();
-  O << markup("<imm:")
-    << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+  O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
     << markup(">");
 }
 
-
 void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
-  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
-                                                 .getImm());
+  ARM_AM::AMSubMode Mode =
+      ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm());
   O << ARM_AM::getAMSubModeStr(Mode);
 }
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -639,20 +636,17 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
   unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
   ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
   if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
-    O << ", "
-      << markup("<imm:")
-      << "#"
-      << ARM_AM::getAddrOpcStr(Op)
-      << ImmOffs * 4
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op)
+      << ImmOffs * 4 << markup(">");
   }
   O << "]" << markup(">");
 }
 
 void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -663,6 +657,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   O << markup("<mem:") << "[";
@@ -672,6 +667,7 @@ void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   if (MO.getReg() == 0)
@@ -684,49 +680,47 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
 
 void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
                                                     unsigned OpNum,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
   uint32_t v = ~MO.getImm();
   int32_t lsb = countTrailingZeros(v);
-  int32_t width = (32 - countLeadingZeros (v)) - lsb;
+  int32_t width = (32 - countLeadingZeros(v)) - lsb;
   assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
-  O << markup("<imm:") << '#' << lsb << markup(">")
-    << ", "
-    << markup("<imm:") << '#' << width << markup(">");
+  O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
+    << '#' << width << markup(">");
 }
 
 void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   unsigned val = MI->getOperand(OpNum).getImm();
-  O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops));
+  O << ARM_MB::MemBOptToString(val, (STI.getFeatureBits() & ARM::HasV8Ops));
 }
 
 void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned val = MI->getOperand(OpNum).getImm();
   O << ARM_ISB::InstSyncBOptToString(val);
 }
 
 void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   unsigned ShiftOp = MI->getOperand(OpNum).getImm();
   bool isASR = (ShiftOp & (1 << 5)) != 0;
   unsigned Amt = ShiftOp & 0x1f;
   if (isASR) {
-    O << ", asr "
-      << markup("<imm:")
-      << "#" << (Amt == 0 ? 32 : Amt)
-      << markup(">");
-  }
-  else if (Amt) {
-    O << ", lsl "
-      << markup("<imm:")
-      << "#" << Amt
+    O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt)
       << markup(">");
+  } else if (Amt) {
+    O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">");
   }
 }
 
 void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   if (Imm == 0)
@@ -736,6 +730,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   // A shift amount of 32 is encoded as 0.
@@ -746,16 +741,19 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   O << "{";
   for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
-    if (i != OpNum) O << ", ";
+    if (i != OpNum)
+      O << ", ";
     printRegName(O, MI->getOperand(i).getReg());
   }
   O << "}";
 }
 
 void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
@@ -763,8 +761,8 @@ void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
   printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
 }
 
-
 void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   if (Op.getImm())
@@ -774,16 +772,16 @@ void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   O << ARM_PROC::IModToString(Op.getImm());
 }
 
 void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   unsigned IFlags = Op.getImm();
-  for (int i=2; i >= 0; --i)
+  for (int i = 2; i >= 0; --i)
     if (IFlags & (1 << i))
       O << ARM_PROC::IFlagsToString(1 << i);
 
@@ -792,11 +790,12 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNum);
   unsigned SpecRegRBit = Op.getImm() >> 4;
   unsigned Mask = Op.getImm() & 0xf;
-  uint64_t FeatureBits = getAvailableFeatures();
+  uint64_t FeatureBits = STI.getFeatureBits();
 
   if (FeatureBits & ARM::FeatureMClass) {
     unsigned SYSm = Op.getImm();
@@ -805,14 +804,30 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
     // For writes, handle extended mask bits if the DSP extension is present.
     if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) {
       switch (SYSm) {
-      case 0x400: O << "apsr_g"; return;
-      case 0xc00: O << "apsr_nzcvqg"; return;
-      case 0x401: O << "iapsr_g"; return;
-      case 0xc01: O << "iapsr_nzcvqg"; return;
-      case 0x402: O << "eapsr_g"; return;
-      case 0xc02: O << "eapsr_nzcvqg"; return;
-      case 0x403: O << "xpsr_g"; return;
-      case 0xc03: O << "xpsr_nzcvqg"; return;
+      case 0x400:
+        O << "apsr_g";
+        return;
+      case 0xc00:
+        O << "apsr_nzcvqg";
+        return;
+      case 0x401:
+        O << "iapsr_g";
+        return;
+      case 0xc01:
+        O << "iapsr_nzcvqg";
+        return;
+      case 0x402:
+        O << "eapsr_g";
+        return;
+      case 0xc02:
+        O << "eapsr_nzcvqg";
+        return;
+      case 0x403:
+        O << "xpsr_g";
+        return;
+      case 0xc03:
+        O << "xpsr_nzcvqg";
+        return;
       }
     }
 
@@ -823,29 +838,66 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
       // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
       // alias for MSR APSR_nzcvq.
       switch (SYSm) {
-      case 0: O << "apsr_nzcvq"; return;
-      case 1: O << "iapsr_nzcvq"; return;
-      case 2: O << "eapsr_nzcvq"; return;
-      case 3: O << "xpsr_nzcvq"; return;
+      case 0:
+        O << "apsr_nzcvq";
+        return;
+      case 1:
+        O << "iapsr_nzcvq";
+        return;
+      case 2:
+        O << "eapsr_nzcvq";
+        return;
+      case 3:
+        O << "xpsr_nzcvq";
+        return;
       }
     }
 
     switch (SYSm) {
-    default: llvm_unreachable("Unexpected mask value!");
-    case  0: O << "apsr"; return;
-    case  1: O << "iapsr"; return;
-    case  2: O << "eapsr"; return;
-    case  3: O << "xpsr"; return;
-    case  5: O << "ipsr"; return;
-    case  6: O << "epsr"; return;
-    case  7: O << "iepsr"; return;
-    case  8: O << "msp"; return;
-    case  9: O << "psp"; return;
-    case 16: O << "primask"; return;
-    case 17: O << "basepri"; return;
-    case 18: O << "basepri_max"; return;
-    case 19: O << "faultmask"; return;
-    case 20: O << "control"; return;
+    default:
+      llvm_unreachable("Unexpected mask value!");
+    case 0:
+      O << "apsr";
+      return;
+    case 1:
+      O << "iapsr";
+      return;
+    case 2:
+      O << "eapsr";
+      return;
+    case 3:
+      O << "xpsr";
+      return;
+    case 5:
+      O << "ipsr";
+      return;
+    case 6:
+      O << "epsr";
+      return;
+    case 7:
+      O << "iepsr";
+      return;
+    case 8:
+      O << "msp";
+      return;
+    case 9:
+      O << "psp";
+      return;
+    case 16:
+      O << "primask";
+      return;
+    case 17:
+      O << "basepri";
+      return;
+    case 18:
+      O << "basepri_max";
+      return;
+    case 19:
+      O << "faultmask";
+      return;
+    case 20:
+      O << "control";
+      return;
     }
   }
 
@@ -854,10 +906,17 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
   if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
     O << "APSR_";
     switch (Mask) {
-    default: llvm_unreachable("Unexpected mask value!");
-    case 4:  O << "g"; return;
-    case 8:  O << "nzcvq"; return;
-    case 12: O << "nzcvqg"; return;
+    default:
+      llvm_unreachable("Unexpected mask value!");
+    case 4:
+      O << "g";
+      return;
+    case 8:
+      O << "nzcvq";
+      return;
+    case 12:
+      O << "nzcvqg";
+      return;
     }
   }
 
@@ -868,14 +927,19 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
 
   if (Mask) {
     O << '_';
-    if (Mask & 8) O << 'f';
-    if (Mask & 4) O << 's';
-    if (Mask & 2) O << 'x';
-    if (Mask & 1) O << 'c';
+    if (Mask & 8)
+      O << 'f';
+    if (Mask & 4)
+      O << 's';
+    if (Mask & 2)
+      O << 'x';
+    if (Mask & 1)
+      O << 'c';
   }
 }
 
 void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   uint32_t Banked = MI->getOperand(OpNum).getImm();
   uint32_t R = (Banked & 0x20) >> 5;
@@ -886,25 +950,40 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
   if (R) {
     O << "SPSR_";
 
-    switch(SysM) {
-    case 0x0e: O << "fiq"; return;
-    case 0x10: O << "irq"; return;
-    case 0x12: O << "svc"; return;
-    case 0x14: O << "abt"; return;
-    case 0x16: O << "und"; return;
-    case 0x1c: O << "mon"; return;
-    case 0x1e: O << "hyp"; return;
-    default: llvm_unreachable("Invalid banked SPSR register");
+    switch (SysM) {
+    case 0x0e:
+      O << "fiq";
+      return;
+    case 0x10:
+      O << "irq";
+      return;
+    case 0x12:
+      O << "svc";
+      return;
+    case 0x14:
+      O << "abt";
+      return;
+    case 0x16:
+      O << "und";
+      return;
+    case 0x1c:
+      O << "mon";
+      return;
+    case 0x1e:
+      O << "hyp";
+      return;
+    default:
+      llvm_unreachable("Invalid banked SPSR register");
     }
   }
 
   assert(!R && "should have dealt with SPSR regs");
   const char *RegNames[] = {
-    "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "",
-    "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "",
-    "lr_irq", "sp_irq", "lr_svc",  "sp_svc",  "lr_abt",  "sp_abt", "lr_und", "sp_und",
-    "",       "",       "",        "",        "lr_mon",  "sp_mon", "elr_hyp", "sp_hyp"
-  };
+      "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr",  "lr_usr",
+      "",       "r8_fiq", "r9_fiq",  "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq",
+      "lr_fiq", "",       "lr_irq",  "sp_irq",  "lr_svc",  "sp_svc",  "lr_abt",
+      "sp_abt", "lr_und", "sp_und",  "",        "",        "",        "",
+      "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"};
   const char *Name = RegNames[SysM];
   assert(Name[0] && "invalid banked register operand");
 
@@ -912,6 +991,7 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   // Handle the undefined 15 CC value here for printing so we don't abort().
@@ -923,12 +1003,14 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
                                                     unsigned OpNum,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   O << ARMCondCodeToString(CC);
 }
 
 void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   if (MI->getOperand(OpNum).getReg()) {
     assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
@@ -938,33 +1020,38 @@ void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   O << "p" << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   O << "c" << MI->getOperand(OpNum).getImm();
 }
 
 void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   O << "{" << MI->getOperand(OpNum).getImm() << "}";
 }
 
 void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
 }
 
-template<unsigned scale>
+template <unsigned scale>
 void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
 
   if (MO.isExpr()) {
@@ -985,25 +1072,26 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
+  O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
     << markup(">");
 }
 
 void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
-  O << markup("<imm:")
-    << "#" << formatImm((Imm == 0 ? 32 : Imm))
+  O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm))
     << markup(">");
 }
 
 void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   // (3 - the number of trailing zeros) is the number of then / else.
   unsigned Mask = MI->getOperand(OpNum).getImm();
-  unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
+  unsigned Firstcond = MI->getOperand(OpNum - 1).getImm();
   unsigned CondBit0 = Firstcond & 1;
   unsigned NumTZ = countTrailingZeros(Mask);
   assert(NumTZ <= 3 && "Invalid IT mask!");
@@ -1017,12 +1105,13 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
@@ -1037,22 +1126,21 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
 
 void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
                                                     unsigned Op,
+                                                    const MCSubtargetInfo &STI,
                                                     raw_ostream &O,
                                                     unsigned Scale) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, Op, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, STI, O);
     return;
   }
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   if (unsigned ImmOffs = MO2.getImm()) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(ImmOffs * Scale)
+    O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale)
       << markup(">");
   }
   O << "]" << markup(">");
@@ -1060,25 +1148,29 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
 
 void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1);
 }
 
 void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2);
 }
 
 void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
                                                      unsigned Op,
+                                                     const MCSubtargetInfo &STI,
                                                      raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
 }
 
 void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
-  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+  printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
 }
 
 // Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
@@ -1086,9 +1178,10 @@ void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
 // REG 0   0           - e.g. R5
 // REG IMM, SH_OPC     - e.g. R5, LSL #3
 void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   unsigned Reg = MO1.getReg();
   printRegName(O, Reg);
@@ -1101,12 +1194,13 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
 
 template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -1119,26 +1213,20 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << formatImm(-OffImm)
-      << markup(">");
-  }
-  else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(OffImm)
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+  } else if (AlwaysPrintImm0 || OffImm > 0) {
+    O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
   }
   O << "]" << markup(">");
 }
 
-template<bool AlwaysPrintImm0>
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -1149,28 +1237,23 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << -OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
   } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
   }
   O << "]" << markup(">");
 }
 
-template<bool AlwaysPrintImm0>
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
                                                   unsigned OpNum,
+                                                  const MCSubtargetInfo &STI,
                                                   raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
-  if (!MO1.isReg()) {   //  For label symbolic references.
-    printOperand(MI, OpNum, O);
+  if (!MO1.isReg()) { //  For label symbolic references.
+    printOperand(MI, OpNum, STI, O);
     return;
   }
 
@@ -1186,39 +1269,31 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
   if (OffImm == INT32_MIN)
     OffImm = 0;
   if (isSub) {
-    O << ", "
-      << markup("<imm:")
-      << "#-" << -OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
   } else if (AlwaysPrintImm0 || OffImm > 0) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << OffImm
-      << markup(">");
+    O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
   }
   O << "]" << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
-                                                       unsigned OpNum,
-                                                       raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
   if (MO2.getImm()) {
-    O << ", "
-      << markup("<imm:")
-      << "#" << formatImm(MO2.getImm() * 4)
+    O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4)
       << markup(">");
   }
   O << "]" << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   int32_t OffImm = (int32_t)MO1.getImm();
   O << ", " << markup("<imm:");
@@ -1231,9 +1306,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
   O << markup(">");
 }
 
-void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   int32_t OffImm = (int32_t)MO1.getImm();
 
@@ -1251,10 +1326,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
 
 void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
                                                  unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
                                                  raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  const MCOperand &MO2 = MI->getOperand(OpNum+1);
-  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+  const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+  const MCOperand &MO3 = MI->getOperand(OpNum + 2);
 
   O << markup("<mem:") << "[";
   printRegName(O, MO1.getReg());
@@ -1266,71 +1342,74 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
   unsigned ShAmt = MO3.getImm();
   if (ShAmt) {
     assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
-    O << ", lsl "
-      << markup("<imm:")
-      << "#" << ShAmt
-      << markup(">");
+    O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">");
   }
   O << "]" << markup(">");
 }
 
 void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
   const MCOperand &MO = MI->getOperand(OpNum);
-  O << markup("<imm:")
-    << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+  O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm())
     << markup(">");
 }
 
 void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned EncodedImm = MI->getOperand(OpNum).getImm();
   unsigned EltBits;
   uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
-  O << markup("<imm:")
-    << "#0x";
+  O << markup("<imm:") << "#0x";
   O.write_hex(Val);
   O << markup(">");
 }
 
 void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
-  O << markup("<imm:")
-    << "#" << formatImm(Imm + 1)
-    << markup(">");
+  O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">");
 }
 
 void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   if (Imm == 0)
     return;
-  O << ", ror "
-    << markup("<imm:")
-    << "#";
+  O << ", ror " << markup("<imm:") << "#";
   switch (Imm) {
-  default: assert (0 && "illegal ror immediate!");
-  case 1: O << "8"; break;
-  case 2: O << "16"; break;
-  case 3: O << "24"; break;
+  default:
+    assert(0 && "illegal ror immediate!");
+  case 1:
+    O << "8";
+    break;
+  case 2:
+    O << "16";
+    break;
+  case 3:
+    O << "24";
+    break;
   }
   O << markup(">");
 }
 
 void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   MCOperand Op = MI->getOperand(OpNum);
 
   // Support for fixups (MCFixup)
   if (Op.isExpr())
-    return printOperand(MI, OpNum, O);
+    return printOperand(MI, OpNum, STI, O);
 
   unsigned Bits = Op.getImm() & 0xFF;
   unsigned Rot = (Op.getImm() & 0xF00) >> 7;
 
-  bool  PrintUnsigned = false;
-  switch (MI->getOpcode()){
+  bool PrintUnsigned = false;
+  switch (MI->getOpcode()) {
   case ARM::MOVi:
     // Movs to PC should be treated unsigned
     PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
@@ -1354,36 +1433,30 @@ void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
   }
 
   // Explicit #bits, #rot implied
-  O << "#"
-    << markup("<imm:")
-    << Bits
-    << markup(">")
-    << ", #"
-    << markup("<imm:")
-    << Rot
-    << markup(">");
+  O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:")
+    << Rot << markup(">");
 }
 
 void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << 16 - MI->getOperand(OpNum).getImm()
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm()
     << markup(">");
 }
 
 void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O) {
-  O << markup("<imm:")
-    << "#" << 32 - MI->getOperand(OpNum).getImm()
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
+  O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm()
     << markup(">");
 }
 
 void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
   O << "[" << MI->getOperand(OpNum).getImm() << "]";
 }
 
 void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
@@ -1391,7 +1464,8 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
-                                          raw_ostream &O) {
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
   unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
@@ -1402,8 +1476,8 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
   O << "}";
 }
 
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
-                                              unsigned OpNum,
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                                              const MCSubtargetInfo &STI,
                                               raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1416,6 +1490,7 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
 }
 
 void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1430,6 +1505,7 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1447,6 +1523,7 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
 
 void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
@@ -1455,6 +1532,7 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1468,6 +1546,7 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
                                                   unsigned OpNum,
+                                                  const MCSubtargetInfo &STI,
                                                   raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1482,8 +1561,9 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
 }
 
 void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
-                                                  unsigned OpNum,
-                                                  raw_ostream &O) {
+                                                 unsigned OpNum,
+                                                 const MCSubtargetInfo &STI,
+                                                 raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
@@ -1498,9 +1578,9 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
-                                                      unsigned OpNum,
-                                                      raw_ostream &O) {
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
   unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@@ -1511,24 +1591,24 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
-                                                        unsigned OpNum,
-                                                        raw_ostream &O) {
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
   O << "{";
   printRegName(O, MI->getOperand(OpNum).getReg());
-  O  << "[], ";
+  O << "[], ";
   printRegName(O, MI->getOperand(OpNum).getReg() + 2);
   O << "[], ";
   printRegName(O, MI->getOperand(OpNum).getReg() + 4);
   O << "[]}";
 }
 
-void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
-                                                       unsigned OpNum,
-                                                       raw_ostream &O) {
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(
+    const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+    raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
@@ -1545,6 +1625,7 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
 
 void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
                                                 unsigned OpNum,
+                                                const MCSubtargetInfo &STI,
                                                 raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
@@ -1558,9 +1639,9 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
   O << "}";
 }
 
-void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+                                               const MCSubtargetInfo &STI,
+                                               raw_ostream &O) {
   // Normally, it's not safe to use register enum values directly with
   // addition to get the next register, but for VFP registers, the
   // sort order is guaranteed because they're all of the form D<n>.
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index f179e01..3927c9f 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -24,146 +24,207 @@ class MCOperand;
 class ARMInstPrinter : public MCInstPrinter {
 public:
   ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                 const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                 const MCRegisterInfo &MRI);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
 
   // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
-  void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+
+  void printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printAddrModeTBB(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrModeTBH(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   template <bool AlwaysPrintImm0>
-  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
                                   bool AlwaysPrintImm0);
   void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
-  void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                               const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
   void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   template <bool AlwaysPrintImm0>
-  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
 
   void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
-  void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMemBOption(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
 
   template <unsigned scale>
-  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbSRImm(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printThumbITMask(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O, unsigned Scale);
   void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+                                       const MCSubtargetInfo &STI,
                                        raw_ostream &O);
   void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
 
-  void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
-                                 raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
-  template<bool AlwaysPrintImm0>
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O);
   void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O);
   void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O);
   void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
-
-  void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printSetendOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCPSIMod(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCPSIFlag(const MCInst *MI, unsigned OpNum,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                             const MCSubtargetInfo &STI, raw_ostream &O);
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O);
   void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
-  void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-  void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRegisterList(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printPImmediate(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCImmediate(const MCInst *MI, unsigned OpNum,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRotImmOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printModImmOperand(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printPCLabel(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
-                                 raw_ostream &O);
-  void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFBits16(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printFBits32(const MCInst *MI, unsigned OpNum,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorIndex(const MCInst *MI, unsigned OpNum,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListOne(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListTwo(const MCInst *MI, unsigned OpNum,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                               raw_ostream &O);
-  void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+                                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListThree(const MCInst *MI, unsigned OpNum,
+                            const MCSubtargetInfo &STI, raw_ostream &O);
+  void printVectorListFour(const MCInst *MI, unsigned OpNum,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
-                                    raw_ostream &O);
+                                    const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
-                                   raw_ostream &O);
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O);
   void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                          const MCSubtargetInfo &STI,
                                           raw_ostream &O);
   void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O);
   void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
   void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+                                 const MCSubtargetInfo &STI, raw_ostream &O);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 0b2e3b0..590d72f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -153,18 +153,20 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
 }
 } // end anonymous namespace
 
-static unsigned getRelaxedOpcode(unsigned Op) {
+unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
+  bool HasThumb2 = STI->getFeatureBits() & ARM::FeatureThumb2;
+
   switch (Op) {
   default:
     return Op;
   case ARM::tBcc:
-    return ARM::t2Bcc;
+    return HasThumb2 ? (unsigned)ARM::t2Bcc : Op;
   case ARM::tLDRpci:
-    return ARM::t2LDRpci;
+    return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op;
   case ARM::tADR:
-    return ARM::t2ADR;
+    return HasThumb2 ? (unsigned)ARM::t2ADR : Op;
   case ARM::tB:
-    return ARM::t2B;
+    return HasThumb2 ? (unsigned)ARM::t2B : Op;
   case ARM::tCBZ:
     return ARM::tHINT;
   case ARM::tCBNZ:
@@ -589,7 +591,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
       (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
       (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
     if (A) {
-      const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+      const MCSymbol &Sym = A->getSymbol();
       if (Asm.isThumbFunc(&Sym))
         Value |= 1;
     }
@@ -598,7 +600,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
   // the basic blocks of the same function.  Thus, we would like to resolve
   // the offset when the destination has the same MCFragment.
   if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
-    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    const MCSymbol &Sym = A->getSymbol();
     const MCSymbolData &SymData = Asm.getSymbolData(Sym);
     IsResolved = (SymData.getFragment() == DF);
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index f4f1082..4fa8c79 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -47,6 +47,8 @@ public:
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
 
+  unsigned getRelaxedOpcode(unsigned Op) const;
+
   bool mayNeedRelaxation(const MCInst &Inst) const override;
 
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 3bd7ab7..ebef789 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -23,7 +23,7 @@ public:
     HasDataInCodeSupport = true;
   }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
                                      Subtype);
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 4efd325..263c4c4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -18,7 +18,7 @@ public:
   ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle)
       : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMELFObjectWriter(OS, OSABI, isLittle());
   }
 };
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 33be347..f2c4358 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -17,7 +17,7 @@ class ARMAsmBackendWinCOFF : public ARMAsmBackend {
 public:
   ARMAsmBackendWinCOFF(const Target &T, StringRef Triple)
       : ARMAsmBackend(T, Triple, true) {}
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
   }
 };
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index a821a6b..f4fedee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -32,7 +32,7 @@ namespace {
   public:
     ARMELFObjectWriter(uint8_t OSABI);
 
-    virtual ~ARMELFObjectWriter();
+    ~ARMELFObjectWriter() override;
 
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -81,7 +81,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
   unsigned Type = 0;
   if (IsPCRel) {
     switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("Unimplemented");
+    default:
+      report_fatal_error("unsupported relocation on symbol");
+      return ELF::R_ARM_NONE;
     case FK_Data_4:
       switch (Modifier) {
       default: llvm_unreachable("Unsupported Modifier");
@@ -147,7 +149,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
     }
   } else {
     switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("invalid fixup kind!");
+    default:
+      report_fatal_error("unsupported relocation on symbol");
+      return ELF::R_ARM_NONE;
     case FK_Data_1:
       switch (Modifier) {
       default: llvm_unreachable("unsupported Modifier");
@@ -247,7 +251,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
   return Type;
 }
 
-MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS,
                                                uint8_t OSABI,
                                                bool IsLittleEndian) {
   MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 9648ffa..e7c777e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -472,7 +472,7 @@ class ARMELFStreamer : public MCELFStreamer {
 public:
   friend class ARMTargetELFStreamer;
 
-  ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+  ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
                  MCCodeEmitter *Emitter, bool IsThumb)
       : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
         MappingSymbolCounter(0), LastEMS(EMS_None) {
@@ -1083,14 +1083,13 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
   }
 
   // Get .ARM.extab or .ARM.exidx section
-  const MCSectionELF *EHSection = nullptr;
-  if (const MCSymbol *Group = FnSection.getGroup()) {
-    EHSection =
-        getContext().getELFSection(EHSecName, Type, Flags | ELF::SHF_GROUP,
-                                   FnSection.getEntrySize(), Group->getName());
-  } else {
-    EHSection = getContext().getELFSection(EHSecName, Type, Flags);
-  }
+  const MCSymbol *Group = FnSection.getGroup();
+  if (Group)
+    Flags |= ELF::SHF_GROUP;
+  const MCSectionELF *EHSection =
+      getContext().getELFSection(EHSecName, Type, Flags, 0, Group,
+                                 FnSection.getUniqueID(), nullptr, &FnSection);
+
   assert(EHSection && "Failed to get the required EH section");
 
   // Switch to .ARM.extab or .ARM.exidx section
@@ -1383,8 +1382,9 @@ MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
 }
 
 MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                    raw_ostream &OS, MCCodeEmitter *Emitter,
-                                    bool RelaxAll, bool IsThumb) {
+                                    raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll,
+                                    bool IsThumb) {
     ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
     // FIXME: This should eventually end up somewhere else where more
     // intelligent flag decisions can be made. For now we are just maintaining
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index e48cabb..6b650f0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -48,7 +48,7 @@ public:
     : MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) {
   }
 
-  ~ARMMCCodeEmitter() {}
+  ~ARMMCCodeEmitter() override {}
 
   bool isThumb(const MCSubtargetInfo &STI) const {
     return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 7ff7f9a..daa8af2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -255,7 +255,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
   std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -310,27 +310,26 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
 }
 
 static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
-                                     MCAsmBackend &MAB, raw_ostream &OS,
+                                     MCAsmBackend &MAB, raw_pwrite_stream &OS,
                                      MCCodeEmitter *Emitter, bool RelaxAll) {
   return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
                               T.getArch() == Triple::thumb);
 }
 
 static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
-                                          raw_ostream &OS,
+                                          raw_pwrite_stream &OS,
                                           MCCodeEmitter *Emitter, bool RelaxAll,
                                           bool DWARFMustBeAtTheEnd) {
   return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd);
 }
 
-static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+static MCInstPrinter *createARMMCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
+                                             const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new ARMInstPrinter(MAI, MII, MRI, STI);
+    return new ARMInstPrinter(MAI, MII, MRI);
   return nullptr;
 }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 7e9ba66..24ca567 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -34,6 +34,7 @@ class StringRef;
 class Target;
 class Triple;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheARMLETarget, TheThumbLETarget;
 extern Target TheARMBETarget, TheThumbBETarget;
@@ -41,9 +42,8 @@ extern Target TheARMBETarget, TheThumbBETarget;
 namespace ARM_MC {
   std::string ParseARMTriple(StringRef TT, StringRef CPU);
 
-  /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
-  /// This is exposed so Asm parser, etc. do not need to go through
-  /// TargetRegistry.
+  /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+  /// do not need to go through TargetRegistry.
   MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
                                             StringRef FS);
 }
@@ -83,24 +83,23 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI
 // Construct a PE/COFF machine code streamer which will generate a PE/COFF
 // object file.
 MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     raw_ostream &OS, MCCodeEmitter *Emitter,
-                                     bool RelaxAll);
+                                     raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll);
 
-/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
-MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
-                                         uint8_t OSABI,
+/// Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
                                          bool IsLittleEndian);
 
-/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
-MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
-                                          bool Is64Bit,
+/// Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
-/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer.
-MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+/// Construct an ARM PE/COFF object writer.
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit);
 
-/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info.
+/// Construct ARM Mach-O relocation info.
 MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 3187d36..b1f9b58 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -88,6 +88,7 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
   case ARM::fixup_arm_ldst_pcrel_12:
   case ARM::fixup_arm_pcrel_10:
   case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_thumb_br:
     return false;
 
     // Handle 24-bit branch kinds.
@@ -101,12 +102,6 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
     Log2Size = llvm::Log2_32(4);
     return true;
 
-    // Handle Thumb branches.
-  case ARM::fixup_arm_thumb_br:
-    RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
-    Log2Size = llvm::Log2_32(2);
-    return true;
-
   case ARM::fixup_t2_uncondbranch:
   case ARM::fixup_arm_thumb_bl:
   case ARM::fixup_arm_thumb_blx:
@@ -477,9 +472,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
-                                                bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
                                                         CPUType,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 2fd6445..166c04b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -26,7 +26,7 @@ public:
     : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
     assert(!Is64Bit && "AArch64 support not yet implemented");
   }
-  virtual ~ARMWinCOFFObjectWriter() { }
+  ~ARMWinCOFFObjectWriter() override {}
 
   unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
                         bool IsCrossSection,
@@ -82,7 +82,8 @@ bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
 }
 
 namespace llvm {
-MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) {
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit) {
   MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
   return createWinCOFFObjectWriter(MOTW, OS);
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index dc707dc..b993b1b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -16,8 +16,8 @@ namespace {
 class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
 public:
   ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
-                     raw_ostream &OS)
-    : MCWinCOFFStreamer(C, AB, CE, OS) { }
+                     raw_pwrite_stream &OS)
+      : MCWinCOFFStreamer(C, AB, CE, OS) {}
 
   void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
   void EmitThumbFunc(MCSymbol *Symbol) override;
@@ -38,7 +38,8 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
 }
 
 MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
-                                           MCAsmBackend &MAB, raw_ostream &OS,
+                                           MCAsmBackend &MAB,
+                                           raw_pwrite_stream &OS,
                                            MCCodeEmitter *Emitter,
                                            bool RelaxAll) {
   return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index b91b0e1..b2599fe 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -132,7 +132,7 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {
   }
 
   case ISD::FrameIndex: {
-    int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     EVT VT = Node->getValueType(0);
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
     unsigned Opc = BPF::MOV_rr;
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index d94416b..37f9164 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -63,11 +63,11 @@ public:
     std::string Str;
     raw_string_ostream OS(Str);
 
-    if (DLoc.isUnknown() == false) {
-      DILocation DIL(DLoc.getAsMDNode(Fn.getContext()));
-      StringRef Filename = DIL.getFilename();
-      unsigned Line = DIL.getLineNumber();
-      unsigned Column = DIL.getColumnNumber();
+    if (DLoc) {
+      auto DIL = DLoc.get();
+      StringRef Filename = DIL->getFilename();
+      unsigned Line = DIL->getLine();
+      unsigned Column = DIL->getColumn();
       OS << Filename << ':' << Line << ':' << Column << ' ';
     }
 
@@ -137,7 +137,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
   setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
 
-  setOperationAction(ISD::BSWAP, MVT::i64, Expand);
   setOperationAction(ISD::CTTZ, MVT::i64, Custom);
   setOperationAction(ISD::CTLZ, MVT::i64, Custom);
   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
@@ -538,12 +537,10 @@ SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
 MachineBasicBlock *
 BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
-  unsigned Opc = MI->getOpcode();
-
   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
 
-  assert(Opc == BPF::Select && "Unexpected instr type to insert");
+  assert(MI->getOpcode() == BPF::Select && "Unexpected instr type to insert");
 
   // To "insert" a SELECT instruction, we actually have to insert the diamond
   // control-flow pattern.  The incoming instruction knows the destination vreg
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index 47001f0..26b2cfe 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -231,8 +231,6 @@ class MOV_RI<string OpcodeStr>
   let BPFSrc = 0;   // BPF_K
   let BPFClass = 7; // BPF_ALU64
 }
-def MOV_rr : MOV_RR<"mov">;
-def MOV_ri : MOV_RI<"mov">;
 
 class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
     : InstBPF<(outs GPR:$dst), (ins u64imm:$imm),
@@ -255,7 +253,35 @@ class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
   let size = 3;     // BPF_DW
   let BPFClass = 0; // BPF_LD
 }
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def LD_imm64 : LD_IMM64<0, "ld_64">;
+def MOV_rr : MOV_RR<"mov">;
+def MOV_ri : MOV_RI<"mov">;
+}
+
+def LD_pseudo
+    : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm),
+              "ld_pseudo\t$dst, $pseudo, $imm",
+              [(set GPR:$dst, (int_bpf_pseudo imm:$pseudo, imm:$imm))]> {
+
+  bits<3> mode;
+  bits<2> size;
+  bits<4> dst;
+  bits<64> imm;
+  bits<4> pseudo;
+
+  let Inst{63-61} = mode;
+  let Inst{60-59} = size;
+  let Inst{51-48} = dst;
+  let Inst{55-52} = pseudo;
+  let Inst{47-32} = 0;
+  let Inst{31-0} = imm{31-0};
+
+  let mode = 0;     // BPF_IMM
+  let size = 3;     // BPF_DW
+  let BPFClass = 0; // BPF_LD
+}
 
 // STORE instructions
 class STORE<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
@@ -461,6 +487,33 @@ def XADD64 : XADD<3, "xadd64", atomic_load_add_64>;
 // undefined def XADD8  : XADD<2, "xadd8", atomic_load_add_8>;
 }
 
+// bswap16, bswap32, bswap64
+class BSWAP<bits<32> SizeOp, string OpcodeStr, list<dag> Pattern>
+    : InstBPF<(outs GPR:$dst), (ins GPR:$src),
+              !strconcat(OpcodeStr, "\t$dst"),
+              Pattern> {
+  bits<4> op;
+  bits<1> BPFSrc;
+  bits<4> dst;
+  bits<32> imm;
+
+  let Inst{63-60} = op;
+  let Inst{59} = BPFSrc;
+  let Inst{51-48} = dst;
+  let Inst{31-0} = imm;
+
+  let op = 0xd;     // BPF_END
+  let BPFSrc = 1;   // BPF_TO_BE (TODO: use BPF_TO_LE for big-endian target)
+  let BPFClass = 4; // BPF_ALU
+  let imm = SizeOp;
+}
+
+let Constraints = "$dst = $src" in {
+def BSWAP16 : BSWAP<16, "bswap16", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+def BSWAP32 : BSWAP<32, "bswap32", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+def BSWAP64 : BSWAP<64, "bswap64", [(set GPR:$dst, (bswap GPR:$src))]>;
+}
+
 let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1,
     hasExtraDefRegAllocReq = 1, hasExtraSrcRegAllocReq = 1, mayLoad = 1 in {
 class LOAD_ABS<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
index 3f09379..05f6d82 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 #include "BPFGenAsmWriter.inc"
 
 void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot) {
+                               StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index d7c2899..adcaff6 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -25,7 +25,8 @@ public:
                  const MCRegisterInfo &MRI)
       : MCInstPrinter(MAI, MII, MRI) {}
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                     const char *Modifier = nullptr);
   void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 87c8077..8393135 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -31,7 +31,7 @@ public:
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
 
   // No instruction requires relaxation
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -71,7 +71,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   *(uint16_t *)&Data[Fixup.getOffset() + 2] = (uint16_t)((Value - 8) / 8);
 }
 
-MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_ostream &OS) const {
+MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   return createBPFELFObjectWriter(OS, 0);
 }
 }
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 169a8a7..a5562c1 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -47,7 +47,7 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target,
   }
 }
 
-MCObjectWriter *llvm::createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI) {
+MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) {
   MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI);
   return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
 }
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index 9c51d66..32d2ef5 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -76,9 +76,8 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
   assert(MO.isExpr());
 
   const MCExpr *Expr = MO.getExpr();
-  MCExpr::ExprKind Kind = Expr->getKind();
 
-  assert(Kind == MCExpr::SymbolRef);
+  assert(Expr->getKind() == MCExpr::SymbolRef);
 
   if (MI.getOpcode() == BPF::JAL)
     // func call name
@@ -125,7 +124,7 @@ void BPFMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Keep track of the current byte being emitted
   unsigned CurByte = 0;
 
-  if (Opcode == BPF::LD_imm64) {
+  if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) {
     uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
     EmitByte(Value >> 56, CurByte, OS);
     EmitByte(((Value >> 48) & 0xff), CurByte, OS);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index fd04001..95f0b02 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -63,15 +63,16 @@ static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM,
 
 static MCStreamer *createBPFMCStreamer(const Triple &T,
                                        MCContext &Ctx, MCAsmBackend &MAB,
-                                       raw_ostream &OS, MCCodeEmitter *Emitter,
+                                       raw_pwrite_stream &OS, MCCodeEmitter *Emitter,
                                        bool RelaxAll) {
   return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
 }
 
-static MCInstPrinter *
-createBPFMCInstPrinter(const Target &T, unsigned SyntaxVariant,
-                       const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                       const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) {
+static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
     return new BPFInstPrinter(MAI, MII, MRI);
   return 0;
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 1fd2bec..ce08b7c 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -28,6 +28,7 @@ class MCSubtargetInfo;
 class Target;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheBPFTarget;
 
@@ -38,7 +39,7 @@ MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
 MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                   StringRef TT, StringRef CPU);
 
-MCObjectWriter *createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
+MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
 }
 
 // Defines symbolic names for BPF registers.  This defines a mapping from
diff --git a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
index 209d17c..8bca2e3 100644
--- a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = BPFDesc
 parent = BPF
-required_libraries = MC BPFAsmPrinter BPFInfo
+required_libraries = MC BPFAsmPrinter BPFInfo Support
 add_to_library_groups = BPF
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index d0e2010..f1a7127 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -15,6 +15,7 @@
 #include "CPPTargetMachine.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
@@ -91,6 +92,7 @@ namespace {
   /// CppWriter - This class is the main chunk of code that converts an LLVM
   /// module to a C++ translation unit.
   class CppWriter : public ModulePass {
+    std::unique_ptr<formatted_raw_ostream> OutOwner;
     formatted_raw_ostream &Out;
     const Module *TheModule;
     uint64_t uniqueNum;
@@ -105,8 +107,9 @@ namespace {
 
   public:
     static char ID;
-    explicit CppWriter(formatted_raw_ostream &o) :
-      ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+    explicit CppWriter(std::unique_ptr<formatted_raw_ostream> o)
+        : ModulePass(ID), OutOwner(std::move(o)), Out(*OutOwner), uniqueNum(0),
+          is_inline(false), indent_level(0) {}
 
     const char *getPassName() const override { return "C++ backend"; }
 
@@ -1721,7 +1724,7 @@ void CppWriter::printFunctionUses(const Function* F) {
   // initializers.
   if (GenerationType != GenFunction) {
     nl(Out) << "// Global Variable Definitions"; nl(Out);
-    for (const auto &GV : gvs) {
+    for (auto *GV : gvs) {
       if (GlobalVariable *Var = dyn_cast<GlobalVariable>(GV))
         printVariableBody(Var);
     }
@@ -2146,13 +2149,12 @@ char CppWriter::ID = 0;
 //                       External Interface declaration
 //===----------------------------------------------------------------------===//
 
-bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                           formatted_raw_ostream &o,
-                                           CodeGenFileType FileType,
-                                           bool DisableVerify,
-                                           AnalysisID StartAfter,
-                                           AnalysisID StopAfter) {
-  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
-  PM.add(new CppWriter(o));
+bool CPPTargetMachine::addPassesToEmitFile(
+    PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType,
+    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
+  if (FileType != TargetMachine::CGFT_AssemblyFile)
+    return true;
+  auto FOut = llvm::make_unique<formatted_raw_ostream>(o);
+  PM.add(new CppWriter(std::move(FOut)));
   return false;
 }
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 678a932..02d705e 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -29,7 +29,7 @@ struct CPPTargetMachine : public TargetMachine {
       : TargetMachine(T, "", TT, CPU, FS, Options) {}
 
 public:
-  bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
+  bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
                            CodeGenFileType FileType, bool DisableVerify,
                            AnalysisID StartAfter,
                            AnalysisID StopAfter) override;
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index c6ffb96..758ccc7 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_target(HexagonCodeGen
   HexagonAsmPrinter.cpp
   HexagonCFGOptimizer.cpp
   HexagonCopyToCombine.cpp
+  HexagonExpandCondsets.cpp
   HexagonExpandPredSpillCode.cpp
   HexagonFixupHwLoops.cpp
   HexagonFrameLowering.cpp
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 180762f..f0c81e0 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -222,21 +222,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   return;
 }
 
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
-                                                 unsigned SyntaxVariant,
-                                                 const MCAsmInfo &MAI,
-                                                 const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
-  if (SyntaxVariant == 0)
-    return(new HexagonInstPrinter(MAI, MII, MRI));
-  else
-   return nullptr;
-}
-
 extern "C" void LLVMInitializeHexagonAsmPrinter() {
   RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
-
-  TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
-                                        createHexagonMCInstPrinter);
 }
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
new file mode 100644
index 0000000..37ed173
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -0,0 +1,1348 @@
+// Replace mux instructions with the corresponding legal instructions.
+// It is meant to work post-SSA, but still on virtual registers. It was
+// originally placed between register coalescing and machine instruction
+// scheduler.
+// In this place in the optimization sequence, live interval analysis had
+// been performed, and the live intervals should be preserved. A large part
+// of the code deals with preserving the liveness information.
+//
+// Liveness tracking aside, the main functionality of this pass is divided
+// into two steps. The first step is to replace an instruction
+//   vreg0 = C2_mux vreg0, vreg1, vreg2
+// with a pair of conditional transfers
+//   vreg0 = A2_tfrt vreg0, vreg1
+//   vreg0 = A2_tfrf vreg0, vreg2
+// It is the intention that the execution of this pass could be terminated
+// after this step, and the code generated would be functionally correct.
+//
+// If the uses of the source values vreg1 and vreg2 are kills, and their
+// definitions are predicable, then in the second step, the conditional
+// transfers will then be rewritten as predicated instructions. E.g.
+//   vreg0 = A2_or vreg1, vreg2
+//   vreg3 = A2_tfrt vreg99, vreg0<kill>
+// will be rewritten as
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// This replacement has two variants: "up" and "down". Consider this case:
+//   vreg0 = A2_or vreg1, vreg2
+//   ... [intervening instructions] ...
+//   vreg3 = A2_tfrt vreg99, vreg0<kill>
+// variant "up":
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//   ... [intervening instructions, vreg0->vreg3] ...
+//   [deleted]
+// variant "down":
+//   [deleted]
+//   ... [intervening instructions] ...
+//   vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// Both, one or none of these variants may be valid, and checks are made
+// to rule out inapplicable variants.
+//
+// As an additional optimization, before either of the two steps above is
+// executed, the pass attempts to coalesce the target register with one of
+// the source registers, e.g. given an instruction
+//   vreg3 = C2_mux vreg0, vreg1, vreg2
+// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds,
+// the instruction would then be (for example)
+//   vreg3 = C2_mux vreg0, vreg3, vreg2
+// and, under certain circumstances, this could result in only one predicated
+// instruction:
+//   vreg3 = A2_tfrf vreg0, vreg2
+//
+
+#define DEBUG_TYPE "expand-condsets"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
+  cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
+static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
+  cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
+
+namespace llvm {
+  void initializeHexagonExpandCondsetsPass(PassRegistry&);
+  FunctionPass *createHexagonExpandCondsets();
+}
+
+namespace {
+  class HexagonExpandCondsets : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonExpandCondsets() :
+        MachineFunctionPass(ID), HII(0), TRI(0), MRI(0),
+        LIS(0), CoaLimitActive(false),
+        TfrLimitActive(false), CoaCounter(0), TfrCounter(0) {
+      if (OptCoaLimit.getPosition())
+        CoaLimitActive = true, CoaLimit = OptCoaLimit;
+      if (OptTfrLimit.getPosition())
+        TfrLimitActive = true, TfrLimit = OptTfrLimit;
+      initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual const char *getPassName() const {
+      return "Hexagon Expand Condsets";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<LiveIntervals>();
+      AU.addPreserved<SlotIndexes>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    const HexagonInstrInfo *HII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    LiveIntervals *LIS;
+
+    bool CoaLimitActive, TfrLimitActive;
+    unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter;
+
+    struct RegisterRef {
+      RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
+          Sub(Op.getSubReg()) {}
+      RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
+      bool operator== (RegisterRef RR) const {
+        return Reg == RR.Reg && Sub == RR.Sub;
+      }
+      bool operator!= (RegisterRef RR) const { return !operator==(RR); }
+      unsigned Reg, Sub;
+    };
+
+    typedef DenseMap<unsigned,unsigned> ReferenceMap;
+    enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
+    enum { Exec_Then = 0x10, Exec_Else = 0x20 };
+    unsigned getMaskForSub(unsigned Sub);
+    bool isCondset(const MachineInstr *MI);
+
+    void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
+    bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
+
+    LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S);
+    LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S);
+    void makeDefined(unsigned Reg, SlotIndex S, bool SetDef);
+    void makeUndead(unsigned Reg, SlotIndex S);
+    void shrinkToUses(unsigned Reg, LiveInterval &LI);
+    void updateKillFlags(unsigned Reg, LiveInterval &LI);
+    void terminateSegment(LiveInterval::iterator LT, SlotIndex S,
+        LiveInterval &LI);
+    void addInstrToLiveness(MachineInstr *MI);
+    void removeInstrFromLiveness(MachineInstr *MI);
+
+    unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
+    MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR,
+        unsigned DstSR, const MachineOperand &PredOp, bool Cond);
+    bool split(MachineInstr *MI);
+    bool splitInBlock(MachineBasicBlock &B);
+
+    bool isPredicable(MachineInstr *MI);
+    MachineInstr *getReachingDefForPred(RegisterRef RD,
+        MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
+    bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses);
+    bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown);
+    void predicateAt(RegisterRef RD, MachineInstr *MI,
+        MachineBasicBlock::iterator Where, unsigned PredR, bool Cond);
+    void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
+        bool Cond, MachineBasicBlock::iterator First,
+        MachineBasicBlock::iterator Last);
+    bool predicate(MachineInstr *TfrI, bool Cond);
+    bool predicateInBlock(MachineBasicBlock &B);
+
+    void postprocessUndefImplicitUses(MachineBasicBlock &B);
+    void removeImplicitUses(MachineInstr *MI);
+    void removeImplicitUses(MachineBasicBlock &B);
+
+    bool isIntReg(RegisterRef RR, unsigned &BW);
+    bool isIntraBlocks(LiveInterval &LI);
+    bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
+    bool coalesceSegments(MachineFunction &MF);
+  };
+}
+
+char HexagonExpandCondsets::ID = 0;
+
+
+unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
+  switch (Sub) {
+    case Hexagon::subreg_loreg:
+      return Sub_Low;
+    case Hexagon::subreg_hireg:
+      return Sub_High;
+    case Hexagon::NoSubRegister:
+      return Sub_None;
+  }
+  llvm_unreachable("Invalid subregister");
+}
+
+
+bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::C2_mux:
+    case Hexagon::C2_muxii:
+    case Hexagon::C2_muxir:
+    case Hexagon::C2_muxri:
+    case Hexagon::MUX64_rr:
+        return true;
+      break;
+  }
+  return false;
+}
+
+
+void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
+      unsigned Exec) {
+  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+  ReferenceMap::iterator F = Map.find(RR.Reg);
+  if (F == Map.end())
+    Map.insert(std::make_pair(RR.Reg, Mask));
+  else
+    F->second |= Mask;
+}
+
+
+bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
+      unsigned Exec) {
+  ReferenceMap::iterator F = Map.find(RR.Reg);
+  if (F == Map.end())
+    return false;
+  unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+  if (Mask & F->second)
+    return true;
+  return false;
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI,
+      SlotIndex S) {
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (I->start >= S)
+      return I;
+  }
+  return LI.end();
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI,
+      SlotIndex S) {
+  LiveInterval::iterator P = LI.end();
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (I->end > S)
+      return P;
+    P = I;
+  }
+  return P;
+}
+
+
+/// Find the implicit use of register Reg in slot index S, and make sure
+/// that the "defined" flag is set to SetDef. While the mux expansion is
+/// going on, predicated instructions will have implicit uses of the
+/// registers that are being defined. This is to keep any preceding
+/// definitions live. If there is no preceding definition, the implicit
+/// use will be marked as "undef", otherwise it will be "defined". This
+/// function is used to update the flag.
+void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S,
+      bool SetDef) {
+  if (!S.isRegister())
+    return;
+  MachineInstr *MI = LIS->getInstructionFromIndex(S);
+  assert(MI && "Expecting instruction");
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+      continue;
+    bool IsDef = !Op.isUndef();
+    if (Op.isImplicit() && IsDef != SetDef)
+      Op.setIsUndef(!SetDef);
+  }
+}
+
+
+void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) {
+  // If S is a block boundary, then there can still be a dead def reaching
+  // this point. Instead of traversing the CFG, queue start points of all
+  // live segments that begin with a register, and end at a block boundary.
+  // This may "resurrect" some truly dead definitions, but doing so is
+  // harmless.
+  SmallVector<MachineInstr*,8> Defs;
+  if (S.isBlock()) {
+    LiveInterval &LI = LIS->getInterval(Reg);
+    for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+      if (!I->start.isRegister() || !I->end.isBlock())
+        continue;
+      MachineInstr *MI = LIS->getInstructionFromIndex(I->start);
+      Defs.push_back(MI);
+    }
+  } else if (S.isRegister()) {
+    MachineInstr *MI = LIS->getInstructionFromIndex(S);
+    Defs.push_back(MI);
+  }
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    MachineInstr *MI = Defs[i];
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+        continue;
+      Op.setIsDead(false);
+    }
+  }
+}
+
+
+/// Shrink the segments in the live interval for a given register to the last
+/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this
+/// function will not mark any definitions of Reg as dead. The reason for this
+/// is that this function is used while a MUX instruction is being expanded,
+/// or while a conditional copy is undergoing predication. During these
+/// processes, there may be defs present in the instruction sequence that have
+/// not yet been removed, or there may be missing uses that have not yet been
+/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible,
+/// but since it does not extend any intervals that are too short, we need to
+/// pre-emptively extend them here in anticipation of further changes.
+void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) {
+  SmallVector<MachineInstr*,4> Deads;
+  LIS->shrinkToUses(&LI, &Deads);
+  // Need to undo the deadification made by "shrinkToUses". It's easier to
+  // do it here, since we have a list of all instructions that were just
+  // marked as dead.
+  for (unsigned i = 0, n = Deads.size(); i < n; ++i) {
+    MachineInstr *MI = Deads[i];
+    // Clear the "dead" flag.
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+        continue;
+      Op.setIsDead(false);
+    }
+    // Extend the live segment to the beginning of the next one.
+    LiveInterval::iterator End = LI.end();
+    SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot();
+    LiveInterval::iterator T = LI.FindSegmentContaining(S);
+    assert(T != End);
+    LiveInterval::iterator N = std::next(T);
+    if (N != End)
+      T->end = N->start;
+    else
+      T->end = LIS->getMBBEndIdx(MI->getParent());
+  }
+  updateKillFlags(Reg, LI);
+}
+
+
+/// Given an updated live interval LI for register Reg, update the kill flags
+/// in instructions using Reg to reflect the liveness changes.
+void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) {
+  MRI->clearKillFlags(Reg);
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    SlotIndex EX = I->end;
+    if (!EX.isRegister())
+      continue;
+    MachineInstr *MI = LIS->getInstructionFromIndex(EX);
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+        continue;
+      // Only set the kill flag on the first encountered use of Reg in this
+      // instruction.
+      Op.setIsKill(true);
+      break;
+    }
+  }
+}
+
+
+/// When adding a new instruction to liveness, the newly added definition
+/// will start a new live segment. This may happen at a position that falls
+/// within an existing live segment. In such case that live segment needs to
+/// be truncated to make room for the new segment. Ultimately, the truncation
+/// will occur at the last use, but for now the segment can be terminated
+/// right at the place where the new segment will start. The segments will be
+/// shrunk-to-uses later.
+void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT,
+      SlotIndex S, LiveInterval &LI) {
+  // Terminate the live segment pointed to by LT within a live interval LI.
+  if (LT == LI.end())
+    return;
+
+  VNInfo *OldVN = LT->valno;
+  SlotIndex EX = LT->end;
+  LT->end = S;
+  // If LT does not end at a block boundary, the termination is done.
+  if (!EX.isBlock())
+    return;
+
+  // If LT ended at a block boundary, it's possible that its value number
+  // is picked up at the beginning other blocks. Create a new value number
+  // and change such blocks to use it instead.
+  VNInfo *NewVN = 0;
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    if (!I->start.isBlock() || I->valno != OldVN)
+      continue;
+    // Generate on-demand a new value number that is defined by the
+    // block beginning (i.e. -phi).
+    if (!NewVN)
+      NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator());
+    I->valno = NewVN;
+  }
+}
+
+
+/// Add the specified instruction to live intervals. This function is used
+/// to update the live intervals while the program code is being changed.
+/// Neither the expansion of a MUX, nor the predication are atomic, and this
+/// function is used to update the live intervals while these transformations
+/// are being done.
+void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) {
+  SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI)
+                                       : LIS->getInstructionIndex(MI);
+  DEBUG(dbgs() << "adding liveness info for instr\n  " << MX << "  " << *MI);
+
+  MX = MX.getRegSlot();
+  bool Predicated = HII->isPredicated(MI);
+  MachineBasicBlock *MB = MI->getParent();
+
+  // Strip all implicit uses from predicated instructions. They will be
+  // added again, according to the updated information.
+  if (Predicated)
+    removeImplicitUses(MI);
+
+  // For each def in MI we need to insert a new live segment starting at MX
+  // into the interval. If there already exists a live segment in the interval
+  // that contains MX, we need to terminate it at MX.
+  SmallVector<RegisterRef,2> Defs;
+  for (auto &Op : MI->operands())
+    if (Op.isReg() && Op.isDef())
+      Defs.push_back(RegisterRef(Op));
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    unsigned DefR = Defs[i].Reg;
+    LiveInterval &LID = LIS->getInterval(DefR);
+    DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI)
+                 << " with interval\n  " << LID << "\n");
+    // If MX falls inside of an existing live segment, terminate it.
+    LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+    if (LT != LID.end())
+      terminateSegment(LT, MX, LID);
+    DEBUG(dbgs() << "after terminating segment\n  " << LID << "\n");
+
+    // Create a new segment starting from MX.
+    LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX);
+    SlotIndex EX;
+    VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator());
+    if (N == LID.end()) {
+      // There is no live segment after MX. End this segment at the end of
+      // the block.
+      EX = LIS->getMBBEndIdx(MB);
+    } else {
+      // If the next segment starts at the block boundary, end the new segment
+      // at the boundary of the preceding block (i.e. the previous index).
+      // Otherwise, end the segment at the beginning of the next segment. In
+      // either case it will be "shrunk-to-uses" later.
+      EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start;
+    }
+    if (Predicated) {
+      // Predicated instruction will have an implicit use of the defined
+      // register. This is necessary so that this definition will not make
+      // any previous definitions dead. If there are no previous live
+      // segments, still add the implicit use, but make it "undef".
+      // Because of the implicit use, the preceding definition is not
+      // dead. Mark is as such (if necessary).
+      MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true);
+      ImpUse.setSubReg(Defs[i].Sub);
+      bool Undef = false;
+      if (P == LID.end())
+        Undef = true;
+      else {
+        // If the previous segment extends to the end of the previous block,
+        // the end index may actually be the beginning of this block. If
+        // the previous segment ends at a block boundary, move it back by one,
+        // to get the proper block for it.
+        SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+        MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+        if (PB != MB && !LIS->isLiveInToMBB(LID, MB))
+          Undef = true;
+      }
+      if (!Undef) {
+        makeUndead(DefR, P->valno->def);
+        // We are adding a live use, so extend the previous segment to
+        // include it.
+        P->end = MX;
+      } else {
+        ImpUse.setIsUndef(true);
+      }
+
+      if (!MI->readsRegister(DefR))
+        MI->addOperand(ImpUse);
+      if (N != LID.end())
+        makeDefined(DefR, N->start, true);
+    }
+    LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN);
+    LID.addSegment(NR);
+    DEBUG(dbgs() << "added a new segment " << NR << "\n  " << LID << "\n");
+    shrinkToUses(DefR, LID);
+    DEBUG(dbgs() << "updated imp-uses: " << *MI);
+    LID.verify();
+  }
+
+  // For each use in MI:
+  // - If there is no live segment that contains MX for the used register,
+  //   extend the previous one. Ignore implicit uses.
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef())
+      continue;
+    unsigned UseR = Op.getReg();
+    LiveInterval &LIU = LIS->getInterval(UseR);
+    // Find the last segment P that starts before MX.
+    LiveInterval::iterator P = LIU.FindSegmentContaining(MX);
+    if (P == LIU.end())
+      P = prevSegment(LIU, MX);
+
+    assert(P != LIU.end() && "MI uses undefined register?");
+    SlotIndex EX = P->end;
+    // If P contains MX, there is not much to do.
+    if (EX > MX) {
+      Op.setIsKill(false);
+      continue;
+    }
+    // Otherwise, extend P to "next(MX)".
+    P->end = MX.getNextIndex();
+    Op.setIsKill(true);
+    // Get the old "kill" instruction, and remove the kill flag.
+    if (MachineInstr *KI = LIS->getInstructionFromIndex(MX))
+      KI->clearRegisterKills(UseR, nullptr);
+    shrinkToUses(UseR, LIU);
+    LIU.verify();
+  }
+}
+
+
+/// Update the live interval information to reflect the removal of the given
+/// instruction from the program. As with "addInstrToLiveness", this function
+/// is called while the program code is being changed.
+void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) {
+  SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot();
+  DEBUG(dbgs() << "removing instr\n  " << MX << "  " << *MI);
+
+  // For each def in MI:
+  // If MI starts a live segment, merge this segment with the previous segment.
+  //
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    unsigned DefR = Op.getReg();
+    LiveInterval &LID = LIS->getInterval(DefR);
+    LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+    assert(LT != LID.end() && "Expecting live segments");
+    DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI)
+                 << " with interval\n  " << LID << "\n");
+    if (LT->start != MX)
+      continue;
+
+    VNInfo *MVN = LT->valno;
+    if (LT != LID.begin()) {
+      // If the current live segment is not the first, the task is easy. If
+      // the previous segment continues into the current block, extend it to
+      // the end of the current one, and merge the value numbers.
+      // Otherwise, remove the current segment, and make the end of it "undef".
+      LiveInterval::iterator P = std::prev(LT);
+      SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+      MachineBasicBlock *MB = MI->getParent();
+      MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+      if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) {
+        makeDefined(DefR, LT->end, false);
+        LID.removeSegment(*LT);
+      } else {
+        // Make the segments adjacent, so that merge-vn can also merge the
+        // segments.
+        P->end = LT->start;
+        makeUndead(DefR, P->valno->def);
+        LID.MergeValueNumberInto(MVN, P->valno);
+      }
+    } else {
+      LiveInterval::iterator N = std::next(LT);
+      LiveInterval::iterator RmB = LT, RmE = N;
+      while (N != LID.end()) {
+        // Iterate until the first register-based definition is found
+        // (i.e. skip all block-boundary entries).
+        LiveInterval::iterator Next = std::next(N);
+        if (N->start.isRegister()) {
+          makeDefined(DefR, N->start, false);
+          break;
+        }
+        if (N->end.isRegister()) {
+          makeDefined(DefR, N->end, false);
+          RmE = Next;
+          break;
+        }
+        RmE = Next;
+        N = Next;
+      }
+      // Erase the segments in one shot to avoid invalidating iterators.
+      LID.segments.erase(RmB, RmE);
+    }
+
+    bool VNUsed = false;
+    for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) {
+      if (I->valno != MVN)
+        continue;
+      VNUsed = true;
+      break;
+    }
+    if (!VNUsed)
+      MVN->markUnused();
+
+    DEBUG(dbgs() << "new interval: ");
+    if (!LID.empty()) {
+      DEBUG(dbgs() << LID << "\n");
+      LID.verify();
+    } else {
+      DEBUG(dbgs() << "<empty>\n");
+      LIS->removeInterval(DefR);
+    }
+  }
+
+  // For uses there is nothing to do. The intervals will be updated via
+  // shrinkToUses.
+  SmallVector<unsigned,4> Uses;
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Uses.push_back(R);
+  }
+  LIS->RemoveMachineInstrFromMaps(MI);
+  MI->eraseFromParent();
+  for (unsigned i = 0, n = Uses.size(); i < n; ++i) {
+    LiveInterval &LI = LIS->getInterval(Uses[i]);
+    shrinkToUses(Uses[i], LI);
+  }
+}
+
+
+/// Get the opcode for a conditional transfer of the value in SO (source
+/// operand). The condition (true/false) is given in Cond.
+unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
+      bool Cond) {
+  using namespace Hexagon;
+  if (SO.isReg()) {
+    unsigned PhysR;
+    RegisterRef RS = SO;
+    if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
+      const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
+      assert(VC->begin() != VC->end() && "Empty register class");
+      PhysR = *VC->begin();
+    } else {
+      assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
+      PhysR = RS.Reg;
+    }
+    unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
+    switch (RC->getSize()) {
+      case 4:
+        return Cond ? A2_tfrt : A2_tfrf;
+      case 8:
+        return Cond ? A2_tfrpt : A2_tfrpf;
+    }
+    llvm_unreachable("Invalid register operand");
+  }
+  if (SO.isImm() || SO.isFPImm())
+    return Cond ? C2_cmoveit : C2_cmoveif;
+  llvm_unreachable("Unexpected source operand");
+}
+
+
+/// Generate a conditional transfer, copying the value SrcOp to the
+/// destination register DstR:DstSR, and using the predicate register from
+/// PredOp. The Cond argument specifies whether the predicate is to be
+/// if(PredOp), or if(!PredOp).
+MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp,
+      unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) {
+  MachineInstr *MI = SrcOp.getParent();
+  MachineBasicBlock &B = *MI->getParent();
+  MachineBasicBlock::iterator At = MI;
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Don't avoid identity copies here (i.e. if the source and the destination
+  // are the same registers). It is actually better to generate them here,
+  // since this would cause the copy to potentially be predicated in the next
+  // step. The predication will remove such a copy if it is unable to
+  /// predicate.
+
+  unsigned Opc = getCondTfrOpcode(SrcOp, Cond);
+  MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc))
+        .addReg(DstR, RegState::Define, DstSR)
+        .addOperand(PredOp)
+        .addOperand(SrcOp);
+  // We don't want any kills yet.
+  TfrI->clearKillInfo();
+  DEBUG(dbgs() << "created an initial copy: " << *TfrI);
+  return TfrI;
+}
+
+
+/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
+/// performs all necessary changes to complete the replacement.
+bool HexagonExpandCondsets::split(MachineInstr *MI) {
+  if (TfrLimitActive) {
+    if (TfrCounter >= TfrLimit)
+      return false;
+    TfrCounter++;
+  }
+  DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber()
+               << ": " << *MI);
+  MachineOperand &MD = MI->getOperand(0); // Definition
+  MachineOperand &MP = MI->getOperand(1); // Predicate register
+  assert(MD.isDef());
+  unsigned DR = MD.getReg(), DSR = MD.getSubReg();
+
+  // First, create the two invididual conditional transfers, and add each
+  // of them to the live intervals information. Do that first and then remove
+  // the old instruction from live intervals.
+  if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true))
+    addInstrToLiveness(TfrT);
+  if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false))
+    addInstrToLiveness(TfrF);
+  removeInstrFromLiveness(MI);
+
+  return true;
+}
+
+
+/// Split all MUX instructions in the given block into pairs of contitional
+/// transfers.
+bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  MachineBasicBlock::iterator I, E, NextI;
+  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+    NextI = std::next(I);
+    if (isCondset(I))
+      Changed |= split(I);
+  }
+  return Changed;
+}
+
+
+bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
+  if (HII->isPredicated(MI) || !HII->isPredicable(MI))
+    return false;
+  if (MI->hasUnmodeledSideEffects() || MI->mayStore())
+    return false;
+  // Reject instructions with multiple defs (e.g. post-increment loads).
+  bool HasDef = false;
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    if (HasDef)
+      return false;
+    HasDef = true;
+  }
+  for (auto &Mo : MI->memoperands())
+    if (Mo->isVolatile())
+      return false;
+  return true;
+}
+
+
+/// Find the reaching definition for a predicated use of RD. The RD is used
+/// under the conditions given by PredR and Cond, and this function will ignore
+/// definitions that set RD under the opposite conditions.
+MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
+      MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
+  MachineBasicBlock &B = *UseIt->getParent();
+  MachineBasicBlock::iterator I = UseIt, S = B.begin();
+  if (I == S)
+    return 0;
+
+  bool PredValid = true;
+  do {
+    --I;
+    MachineInstr *MI = &*I;
+    // Check if this instruction can be ignored, i.e. if it is predicated
+    // on the complementary condition.
+    if (PredValid && HII->isPredicated(MI)) {
+      if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI)))
+        continue;
+    }
+
+    // Check the defs. If the PredR is defined, invalidate it. If RD is
+    // defined, return the instruction or 0, depending on the circumstances.
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+      RegisterRef RR = Op;
+      if (RR.Reg == PredR) {
+        PredValid = false;
+        continue;
+      }
+      if (RR.Reg != RD.Reg)
+        continue;
+      // If the "Reg" part agrees, there is still the subregister to check.
+      // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but
+      // not vreg1 (w/o subregisters).
+      if (RR.Sub == RD.Sub)
+        return MI;
+      if (RR.Sub == 0 || RD.Sub == 0)
+        return 0;
+      // We have different subregisters, so we can continue looking.
+    }
+  } while (I != S);
+
+  return 0;
+}
+
+
+/// Check if the instruction MI can be safely moved over a set of instructions
+/// whose side-effects (in terms of register defs and uses) are expressed in
+/// the maps Defs and Uses. These maps reflect the conditional defs and uses
+/// that depend on the same predicate register to allow moving instructions
+/// over instructions predicated on the opposite condition.
+bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs,
+      ReferenceMap &Uses) {
+  // In order to be able to safely move MI over instructions that define
+  // "Defs" and use "Uses", no def operand from MI can be defined or used
+  // and no use operand can be defined.
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg())
+      continue;
+    RegisterRef RR = Op;
+    // For physical register we would need to check register aliases, etc.
+    // and we don't want to bother with that. It would be of little value
+    // before the actual register rewriting (from virtual to physical).
+    if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+      return false;
+    // No redefs for any operand.
+    if (isRefInMap(RR, Defs, Exec_Then))
+      return false;
+    // For defs, there cannot be uses.
+    if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then))
+      return false;
+  }
+  return true;
+}
+
+
+/// Check if the instruction accessing memory (TheI) can be moved to the
+/// location ToI.
+bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI,
+      bool IsDown) {
+  bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore();
+  if (!IsLoad && !IsStore)
+    return true;
+  if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
+    return true;
+  if (TheI->hasUnmodeledSideEffects())
+    return false;
+
+  MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI;
+  MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI;
+  bool Ordered = TheI->hasOrderedMemoryRef();
+
+  // Search for aliased memory reference in (StartI, EndI).
+  for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) {
+    MachineInstr *MI = &*I;
+    if (MI->hasUnmodeledSideEffects())
+      return false;
+    bool L = MI->mayLoad(), S = MI->mayStore();
+    if (!L && !S)
+      continue;
+    if (Ordered && MI->hasOrderedMemoryRef())
+      return false;
+
+    bool Conflict = (L && IsStore) || S;
+    if (Conflict)
+      return false;
+  }
+  return true;
+}
+
+
+/// Generate a predicated version of MI (where the condition is given via
+/// PredR and Cond) at the point indicated by Where.
+void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI,
+      MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) {
+  // The problem with updating live intervals is that we can move one def
+  // past another def. In particular, this can happen when moving an A2_tfrt
+  // over an A2_tfrf defining the same register. From the point of view of
+  // live intervals, these two instructions are two separate definitions,
+  // and each one starts another live segment. LiveIntervals's "handleMove"
+  // does not allow such moves, so we need to handle it ourselves. To avoid
+  // invalidating liveness data while we are using it, the move will be
+  // implemented in 4 steps: (1) add a clone of the instruction MI at the
+  // target location, (2) update liveness, (3) delete the old instruction,
+  // and (4) update liveness again.
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = Where->getDebugLoc();  // "Where" points to an instruction.
+  unsigned Opc = MI->getOpcode();
+  unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
+  MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
+  unsigned Ox = 0, NP = MI->getNumOperands();
+  // Skip all defs from MI first.
+  while (Ox < NP) {
+    MachineOperand &MO = MI->getOperand(Ox);
+    if (!MO.isReg() || !MO.isDef())
+      break;
+    Ox++;
+  }
+  // Add the new def, then the predicate register, then the rest of the
+  // operands.
+  MB.addReg(RD.Reg, RegState::Define, RD.Sub);
+  MB.addReg(PredR);
+  while (Ox < NP) {
+    MachineOperand &MO = MI->getOperand(Ox);
+    if (!MO.isReg() || !MO.isImplicit())
+      MB.addOperand(MO);
+    Ox++;
+  }
+
+  MachineFunction &MF = *B.getParent();
+  MachineInstr::mmo_iterator I = MI->memoperands_begin();
+  unsigned NR = std::distance(I, MI->memoperands_end());
+  MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
+  for (unsigned i = 0; i < NR; ++i)
+    MemRefs[i] = *I++;
+  MB.setMemRefs(MemRefs, MemRefs+NR);
+
+  MachineInstr *NewI = MB;
+  NewI->clearKillInfo();
+  addInstrToLiveness(NewI);
+}
+
+
+/// In the range [First, Last], rename all references to the "old" register RO
+/// to the "new" register RN, but only in instructions predicated on the given
+/// condition.
+void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
+      unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
+      MachineBasicBlock::iterator Last) {
+  MachineBasicBlock::iterator End = std::next(Last);
+  for (MachineBasicBlock::iterator I = First; I != End; ++I) {
+    MachineInstr *MI = &*I;
+    // Do not touch instructions that are not predicated, or are predicated
+    // on the opposite condition.
+    if (!HII->isPredicated(MI))
+      continue;
+    if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI)))
+      continue;
+
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || RO != RegisterRef(Op))
+        continue;
+      Op.setReg(RN.Reg);
+      Op.setSubReg(RN.Sub);
+      // In practice, this isn't supposed to see any defs.
+      assert(!Op.isDef() && "Not expecting a def");
+    }
+  }
+}
+
+
+/// For a given conditional copy, predicate the definition of the source of
+/// the copy under the given condition (using the same predicate register as
+/// the copy).
+bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
+  // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
+  unsigned Opc = TfrI->getOpcode();
+  (void)Opc;
+  assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
+  DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
+               << ": " << *TfrI);
+
+  MachineOperand &MD = TfrI->getOperand(0);
+  MachineOperand &MP = TfrI->getOperand(1);
+  MachineOperand &MS = TfrI->getOperand(2);
+  // The source operand should be a <kill>. This is not strictly necessary,
+  // but it makes things a lot simpler. Otherwise, we would need to rename
+  // some registers, which would complicate the transformation considerably.
+  if (!MS.isKill())
+    return false;
+
+  RegisterRef RT(MS);
+  unsigned PredR = MP.getReg();
+  MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
+  if (!DefI || !isPredicable(DefI))
+    return false;
+
+  DEBUG(dbgs() << "Source def: " << *DefI);
+
+  // Collect the information about registers defined and used between the
+  // DefI and the TfrI.
+  // Map: reg -> bitmask of subregs
+  ReferenceMap Uses, Defs;
+  MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
+
+  // Check if the predicate register is valid between DefI and TfrI.
+  // If it is, we can then ignore instructions predicated on the negated
+  // conditions when collecting def and use information.
+  bool PredValid = true;
+  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+    if (!I->modifiesRegister(PredR, 0))
+      continue;
+    PredValid = false;
+    break;
+  }
+
+  for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+    MachineInstr *MI = &*I;
+    // If this instruction is predicated on the same register, it could
+    // potentially be ignored.
+    // By default assume that the instruction executes on the same condition
+    // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
+    unsigned Exec = Exec_Then | Exec_Else;
+    if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR))
+      Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else;
+
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg())
+        continue;
+      // We don't want to deal with physical registers. The reason is that
+      // they can be aliased with other physical registers. Aliased virtual
+      // registers must share the same register number, and can only differ
+      // in the subregisters, which we are keeping track of. Physical
+      // registers ters no longer have subregisters---their super- and
+      // subregisters are other physical registers, and we are not checking
+      // that.
+      RegisterRef RR = Op;
+      if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+        return false;
+
+      ReferenceMap &Map = Op.isDef() ? Defs : Uses;
+      addRefToMap(RR, Map, Exec);
+    }
+  }
+
+  // The situation:
+  //   RT = DefI
+  //   ...
+  //   RD = TfrI ..., RT
+
+  // If the register-in-the-middle (RT) is used or redefined between
+  // DefI and TfrI, we may not be able proceed with this transformation.
+  // We can ignore a def that will not execute together with TfrI, and a
+  // use that will. If there is such a use (that does execute together with
+  // TfrI), we will not be able to move DefI down. If there is a use that
+  // executed if TfrI's condition is false, then RT must be available
+  // unconditionally (cannot be predicated).
+  // Essentially, we need to be able to rename RT to RD in this segment.
+  if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
+    return false;
+  RegisterRef RD = MD;
+  // If the predicate register is defined between DefI and TfrI, the only
+  // potential thing to do would be to move the DefI down to TfrI, and then
+  // predicate. The reaching def (DefI) must be movable down to the location
+  // of the TfrI.
+  // If the target register of the TfrI (RD) is not used or defined between
+  // DefI and TfrI, consider moving TfrI up to DefI.
+  bool CanUp =   canMoveOver(TfrI, Defs, Uses);
+  bool CanDown = canMoveOver(DefI, Defs, Uses);
+  // The TfrI does not access memory, but DefI could. Check if it's safe
+  // to move DefI down to TfrI.
+  if (DefI->mayLoad() || DefI->mayStore())
+    if (!canMoveMemTo(DefI, TfrI, true))
+      CanDown = false;
+
+  DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
+               << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
+  MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
+  if (CanUp)
+    predicateAt(RD, DefI, PastDefIt, PredR, Cond);
+  else if (CanDown)
+    predicateAt(RD, DefI, TfrIt, PredR, Cond);
+  else
+    return false;
+
+  if (RT != RD)
+    renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
+
+  // Delete the user of RT first (it should work either way, but this order
+  // of deleting is more natural).
+  removeInstrFromLiveness(TfrI);
+  removeInstrFromLiveness(DefI);
+  return true;
+}
+
+
+/// Predicate all cases of conditional copies in the specified block.
+bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  MachineBasicBlock::iterator I, E, NextI;
+  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+    NextI = std::next(I);
+    unsigned Opc = I->getOpcode();
+    if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
+      bool Done = predicate(I, (Opc == Hexagon::A2_tfrt));
+      if (!Done) {
+        // If we didn't predicate I, we may need to remove it in case it is
+        // an "identity" copy, e.g.  vreg1 = A2_tfrt vreg2, vreg1.
+        if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2)))
+          removeInstrFromLiveness(I);
+      }
+      Changed |= Done;
+    }
+  }
+  return Changed;
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) {
+  for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+    MachineOperand &MO = MI->getOperand(i-1);
+    if (MO.isReg() && MO.isUse() && MO.isImplicit())
+      MI->RemoveOperand(i-1);
+  }
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) {
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    if (HII->isPredicated(MI))
+      removeImplicitUses(MI);
+  }
+}
+
+
+void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) {
+  // Implicit uses that are "undef" are only meaningful (outside of the
+  // internals of this pass) when the instruction defines a subregister,
+  // and the implicit-undef use applies to the defined register. In such
+  // cases, the proper way to record the information in the IR is to mark
+  // the definition as "undef", which will be interpreted as "read-undef".
+  typedef SmallSet<unsigned,2> RegisterSet;
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    RegisterSet Undefs;
+    for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+      MachineOperand &MO = MI->getOperand(i-1);
+      if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) {
+        MI->RemoveOperand(i-1);
+        Undefs.insert(MO.getReg());
+      }
+    }
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef() || !Op.getSubReg())
+        continue;
+      if (Undefs.count(Op.getReg()))
+        Op.setIsUndef(true);
+    }
+  }
+}
+
+
+bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
+  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return false;
+  const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    BW = 32;
+    return true;
+  }
+  if (RC == &Hexagon::DoubleRegsRegClass) {
+    BW = (RR.Sub != 0) ? 32 : 64;
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
+  for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+    LiveRange::Segment &LR = *I;
+    // Range must start at a register...
+    if (!LR.start.isRegister())
+      return false;
+    // ...and end in a register or in a dead slot.
+    if (!LR.end.isRegister() && !LR.end.isDead())
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
+  if (CoaLimitActive) {
+    if (CoaCounter >= CoaLimit)
+      return false;
+    CoaCounter++;
+  }
+  unsigned BW1, BW2;
+  if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
+    return false;
+  if (MRI->isLiveIn(R1.Reg))
+    return false;
+  if (MRI->isLiveIn(R2.Reg))
+    return false;
+
+  LiveInterval &L1 = LIS->getInterval(R1.Reg);
+  LiveInterval &L2 = LIS->getInterval(R2.Reg);
+  bool Overlap = L1.overlaps(L2);
+
+  DEBUG(dbgs() << "compatible registers: ("
+               << (Overlap ? "overlap" : "disjoint") << ")\n  "
+               << PrintReg(R1.Reg, TRI, R1.Sub) << "  " << L1 << "\n  "
+               << PrintReg(R2.Reg, TRI, R2.Sub) << "  " << L2 << "\n");
+  if (R1.Sub || R2.Sub)
+    return false;
+  if (Overlap)
+    return false;
+
+  // Coalescing could have a negative impact on scheduling, so try to limit
+  // to some reasonable extent. Only consider coalescing segments, when one
+  // of them does not cross basic block boundaries.
+  if (!isIntraBlocks(L1) && !isIntraBlocks(L2))
+    return false;
+
+  MRI->replaceRegWith(R2.Reg, R1.Reg);
+
+  // Move all live segments from L2 to L1.
+  typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap;
+  ValueInfoMap VM;
+  for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) {
+    VNInfo *NewVN, *OldVN = I->valno;
+    ValueInfoMap::iterator F = VM.find(OldVN);
+    if (F == VM.end()) {
+      NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
+      VM.insert(std::make_pair(OldVN, NewVN));
+    } else {
+      NewVN = F->second;
+    }
+    L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
+  }
+  while (L2.begin() != L2.end())
+    L2.removeSegment(*L2.begin());
+
+  updateKillFlags(R1.Reg, L1);
+  DEBUG(dbgs() << "coalesced: " << L1 << "\n");
+  L1.verify();
+
+  return true;
+}
+
+
+/// Attempt to coalesce one of the source registers to a MUX intruction with
+/// the destination register. This could lead to having only one predicated
+/// instruction in the end instead of two.
+bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
+  SmallVector<MachineInstr*,16> Condsets;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock &B = *I;
+    for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) {
+      MachineInstr *MI = &*J;
+      if (!isCondset(MI))
+        continue;
+      MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
+      if (!S1.isReg() && !S2.isReg())
+        continue;
+      Condsets.push_back(MI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, n = Condsets.size(); i < n; ++i) {
+    MachineInstr *CI = Condsets[i];
+    RegisterRef RD = CI->getOperand(0);
+    RegisterRef RP = CI->getOperand(1);
+    MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
+    bool Done = false;
+    // Consider this case:
+    //   vreg1 = instr1 ...
+    //   vreg2 = instr2 ...
+    //   vreg0 = C2_mux ..., vreg1, vreg2
+    // If vreg0 was coalesced with vreg1, we could end up with the following
+    // code:
+    //   vreg0 = instr1 ...
+    //   vreg2 = instr2 ...
+    //   vreg0 = A2_tfrf ..., vreg2
+    // which will later become:
+    //   vreg0 = instr1 ...
+    //   vreg0 = instr2_cNotPt ...
+    // i.e. there will be an unconditional definition (instr1) of vreg0
+    // followed by a conditional one. The output dependency was there before
+    // and it unavoidable, but if instr1 is predicable, we will no longer be
+    // able to predicate it here.
+    // To avoid this scenario, don't coalesce the destination register with
+    // a source register that is defined by a predicable instruction.
+    if (S1.isReg()) {
+      RegisterRef RS = S1;
+      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
+      if (!RDef || !HII->isPredicable(RDef))
+        Done = coalesceRegisters(RD, RegisterRef(S1));
+    }
+    if (!Done && S2.isReg()) {
+      RegisterRef RS = S2;
+      MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
+      if (!RDef || !HII->isPredicable(RDef))
+        Done = coalesceRegisters(RD, RegisterRef(S2));
+    }
+    Changed |= Done;
+  }
+  return Changed;
+}
+
+
+bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
+  HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
+  TRI = MF.getSubtarget().getRegisterInfo();
+  LIS = &getAnalysis<LiveIntervals>();
+  MRI = &MF.getRegInfo();
+
+  bool Changed = false;
+
+  // Try to coalesce the target of a mux with one of its sources.
+  // This could eliminate a register copy in some circumstances.
+  Changed |= coalesceSegments(MF);
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    // First, simply split all muxes into a pair of conditional transfers
+    // and update the live intervals to reflect the new arrangement.
+    // This is done mainly to make the live interval update simpler, than it
+    // would be while trying to predicate instructions at the same time.
+    Changed |= splitInBlock(*I);
+    // Traverse all blocks and collapse predicable instructions feeding
+    // conditional transfers into predicated instructions.
+    // Walk over all the instructions again, so we may catch pre-existing
+    // cases that were not created in the previous step.
+    Changed |= predicateInBlock(*I);
+  }
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    postprocessUndefImplicitUses(*I);
+  return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Expand Condsets";
+  PassInfo *PI = new PassInfo(Name, "expand-condsets",
+        &HexagonExpandCondsets::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+FunctionPass *llvm::createHexagonExpandCondsets() {
+  return new HexagonExpandCondsets();
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index a2209ab..63900e0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -316,6 +316,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile=*/false, /*AlwaysInline=*/false,
+                       /*isTailCall=*/false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
@@ -1716,6 +1717,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SUBC, MVT::i32, Expand);
   setOperationAction(ISD::SUBC, MVT::i64, Expand);
 
+  // Only add and sub that detect overflow are the saturating ones.
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::UADDO, VT, Expand);
+    setOperationAction(ISD::SADDO, VT, Expand);
+    setOperationAction(ISD::USUBO, VT, Expand);
+    setOperationAction(ISD::SSUBO, VT, Expand);
+  }
+
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i64, Expand);
   setOperationAction(ISD::CTTZ, MVT::i32, Expand);
@@ -2106,7 +2115,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       // is Big Endian.
       unsigned OpIdx = NElts - i - 1;
       SDValue Operand = BVN->getOperand(OpIdx);
-      if (dyn_cast<ConstantSDNode>(Operand))
+      if (isa<ConstantSDNode>(Operand))
         // This operand is already in ConstVal.
         continue;
 
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index fbf1ca9..ff4bcad 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -845,8 +845,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
   return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
 }
 
-int HexagonInstrInfo::
-getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
   enum Hexagon::PredSense inPredSense;
   inPredSense = invertPredicate ? Hexagon::PredSense_false :
                                   Hexagon::PredSense_true;
@@ -884,7 +883,7 @@ PredicateInstruction(MachineInstr *MI,
   // This will change MI's opcode to its predicate version.
   // However, its operand list is still the old one, i.e. the
   // non-predicate one.
-  MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+  MI->setDesc(get(getCondOpcode(Opc, invertJump)));
 
   int oper = -1;
   unsigned int GAIdx = 0;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2644248..284dde1 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -216,9 +216,7 @@ public:
   short getNonExtOpcode(const MachineInstr *MI) const;
   bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
   bool PredOpcodeHasNot(Opcode_t Opcode) const;
-
-private:
-  int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+  int getCondOpcode(int Opc, bool sense) const;
 
 };
 
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 1717ae3..d61cc54 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -72,7 +72,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
 
 HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
                                    const TargetMachine &TM)
-    : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()),
+    : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
       TSInfo(*TM.getDataLayout()), FrameLowering() {
 
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 48b0bc8..0679866 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -27,11 +27,15 @@
 using namespace llvm;
 
 static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
-      cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
+  cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
 
 static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
-      cl::Hidden, cl::ZeroOrMore, cl::init(false),
-      cl::desc("Disable Hexagon CFG Optimization"));
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable Hexagon CFG Optimization"));
+
+static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
+  cl::init(true), cl::Hidden, cl::ZeroOrMore,
+  cl::desc("Early expansion of MUX"));
 
 
 /// HexagonTargetMachineModule - Note that this is used on hosts that
@@ -55,6 +59,10 @@ static MachineSchedRegistry
 SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
                     createVLIWMachineSched);
 
+namespace llvm {
+  FunctionPass *createHexagonExpandCondsets();
+}
+
 /// HexagonTargetMachine ctor - Create an ILP32 architecture model.
 ///
 
@@ -79,7 +87,15 @@ namespace {
 class HexagonPassConfig : public TargetPassConfig {
 public:
   HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
-      : TargetPassConfig(TM, PM) {}
+    : TargetPassConfig(TM, PM) {
+    bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None);
+    if (!NoOpt) {
+      if (EnableExpandCondsets) {
+        Pass *Exp = createHexagonExpandCondsets();
+        insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
+      }
+    }
+  }
 
   HexagonTargetMachine &getHexagonTargetMachine() const {
     return getTM<HexagonTargetMachine>();
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index bdccf88..155aa9e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -57,7 +57,7 @@ public:
   ELFHexagonAsmBackend(Target const &T, uint8_t OSABI)
       : HexagonAsmBackend(T), OSABI(OSABI) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     StringRef CPU("HexagonV4");
     return createHexagonELFObjectWriter(OS, OSABI, CPU);
   }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 4a3ac8c..fde935b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -27,8 +27,8 @@ private:
 public:
   HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
 
-  virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
-                                bool IsPCRel) const override;
+  unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+                        bool IsPCRel) const override;
 };
 }
 
@@ -55,9 +55,9 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
   return Type;
 }
 
-MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS,
                                                    uint8_t OSABI,
                                                    StringRef CPU) {
   MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
   return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
-}
-\ No newline at end of file
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 6c87c9f..ec55234 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -78,7 +78,8 @@ StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
 }
 
 void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
-                                   StringRef Annot) {
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   const char startPacket = '{',
              endPacket = '}';
   // TODO: add outer HW loop when it's supported too.
@@ -94,7 +95,7 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
 
       Nop.setOpcode (Hexagon::A2_nop);
       HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI));
-      printInst (&Nop, O, NoAnnot);
+      printInst (&Nop, O, NoAnnot, STI);
     }
 
     // Close the packet.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index d02243b..98fb99b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -25,7 +25,8 @@ namespace llvm {
                                 MCRegisterInfo const &MRI)
       : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
 
-    void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot) override;
+    void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+                   const MCSubtargetInfo &STI) override;
     virtual StringRef getOpcodeName(unsigned Opcode) const;
     void printInstruction(const MCInst *MI, raw_ostream &O);
     StringRef getRegName(unsigned RegNo) const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index c63bf32..2e10d81 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -75,13 +75,16 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
   return X;
 }
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
-    return new HexagonInstPrinter(MAI, MII, MRI);
+                                                 const MCRegisterInfo &MRI) {
+  if (SyntaxVariant == 0)
+    return(new HexagonInstPrinter(MAI, MII, MRI));
+  else
+   return nullptr;
 }
 
 // Force static initialization.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 17072d9..de63fd2 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -27,6 +27,7 @@ class MCSubtargetInfo;
 class Target;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheHexagonTarget;
 
@@ -40,8 +41,8 @@ MCAsmBackend *createHexagonAsmBackend(Target const &T,
                                       MCRegisterInfo const &MRI, StringRef TT,
                                       StringRef CPU);
 
-MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
-                                             StringRef CPU);
+MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+                                             uint8_t OSABI, StringRef CPU);
 
 } // End llvm namespace
 
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index acf1214..6c43d97 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 #include "MSP430GenAsmWriter.inc"
 
 void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                  StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 7fae505..70141a9 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -25,7 +25,8 @@ namespace llvm {
                       const MCRegisterInfo &MRI)
       : MCInstPrinter(MAI, MII, MRI) {}
 
-    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+    void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                   const MCSubtargetInfo &STI) override;
 
     // Autogenerated by tblgen.
     void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 4c70803..775c0b2 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -58,12 +58,11 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI,
                                                 const MCInstrInfo &MII,
-                                                const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &STI) {
+                                                const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
     return new MSP430InstPrinter(MAI, MII, MRI);
   return nullptr;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 18141a6..08f41a8 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -645,6 +645,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                               Flags.getByValAlign(),
                               /*isVolatile*/false,
                               /*AlwaysInline=*/true,
+                              /*isTailCall=*/false,
                               MachinePointerInfo(),
                               MachinePointerInfo());
       } else {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 68868b6..9266c3b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -102,12 +102,6 @@ namespace llvm {
                                  const std::string &Constraint,
                                  MVT VT) const override;
 
-    unsigned getInlineAsmMemConstraint(
-        const std::string &ConstraintCode) const override {
-      // FIXME: Map different constraints differently.
-      return InlineAsm::Constraint_m;
-    }
-
     /// isTruncateFree - Return true if it's free to truncate a value of type
     /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
     /// register R15W to i8 by referencing its sub-register R15B.
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 05352a2..c63a57c 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -62,7 +62,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
   }
 
   // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.GetOrCreateSymbol(Name);
 }
 
 MCSymbol *MSP430MCInstLower::
@@ -79,7 +79,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
   }
 
   // Create a symbol for the name.
-  return Ctx.GetOrCreateSymbol(Name.str());
+  return Ctx.GetOrCreateSymbol(Name);
 }
 
 MCSymbol *MSP430MCInstLower::
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 6401bc1..6f7e3c1 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -220,6 +220,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseDirectiveNaN();
   bool parseDirectiveSet();
   bool parseDirectiveOption();
+  bool parseInsnDirective();
 
   bool parseSetAtDirective();
   bool parseSetNoAtDirective();
@@ -272,7 +273,10 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   unsigned getGPR(int RegNo);
 
-  int getATReg(SMLoc Loc);
+  /// Returns the internal register number for the current AT. Also checks if
+  /// the current AT is unavailable (set to $0) and gives an error if it is.
+  /// This should be used in pseudo-instruction expansions which need AT.
+  unsigned getATReg(SMLoc Loc);
 
   bool processInstruction(MCInst &Inst, SMLoc IDLoc,
                           SmallVectorImpl<MCInst> &Instructions);
@@ -1713,7 +1717,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
   // FIXME: gas has a special case for values that are 000...1111, which
   // becomes a li -1 and then a dsrl
   if (0 <= ImmValue && ImmValue <= 65535) {
-    // For 0 <= j <= 65535.
+    // For unsigned and positive signed 16-bit values (0 <= j <= 65535):
     // li d,j => ori d,$zero,j
     tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
@@ -1721,7 +1725,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else if (ImmValue < 0 && ImmValue >= -32768) {
-    // For -32768 <= j < 0.
+    // For negative signed 16-bit values (-32768 <= j < 0):
     // li d,j => addiu d,$zero,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
@@ -1729,8 +1733,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else if ((ImmValue & 0xffffffff) == ImmValue) {
-    // For any value of j that is representable as a 32-bit integer, create
-    // a sequence of:
+    // For all other values which are representable as a 32-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
@@ -1752,8 +1755,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     // | 16-bytes | 16-bytes | 16-bytes |
     // |__________|__________|__________|
     //
-    // For any value of j that is representable as a 48-bit integer, create
-    // a sequence of:
+    // For any 64-bit value that is representable as a 48-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,hi16(lo32(j))
     //           dsll d,d,16
@@ -1778,7 +1780,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
     // | 16-bytes | 16-bytes | 16-bytes | 16-bytes |
     // |__________|__________|__________|__________|
     //
-    // For any value of j that isn't representable as a 48-bit integer.
+    // For all other values which are representable as a 64-bit integer:
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(hi32(j))
     //           dsll d,d,16
@@ -2048,13 +2050,11 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   if (isLoad && IsGPR && (BaseRegNum != RegOpNum))
     TmpRegNum = RegOpNum;
   else {
-    int AT = getATReg(IDLoc);
     // At this point we need AT to perform the expansions and we exit if it is
     // not available.
-    if (!AT)
+    TmpRegNum = getATReg(IDLoc);
+    if (!TmpRegNum)
       return;
-    TmpRegNum = getReg(
-        (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT);
   }
 
   TempInst.setOpcode(Mips::LUi);
@@ -2078,12 +2078,14 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   // Prepare TempInst for next instruction.
   TempInst.clear();
   // Add temp register to base.
-  TempInst.setOpcode(Mips::ADDu);
-  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
-  TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
-  TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
-  Instructions.push_back(TempInst);
-  TempInst.clear();
+  if (BaseRegNum != Mips::ZERO) {
+    TempInst.setOpcode(Mips::ADDu);
+    TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+    TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+    TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+    Instructions.push_back(TempInst);
+    TempInst.clear();
+  }
   // And finally, create original instruction with low part
   // of offset and new base.
   TempInst.setOpcode(Inst.getOpcode());
@@ -2383,11 +2385,15 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
   return CC;
 }
 
-int MipsAsmParser::getATReg(SMLoc Loc) {
-  int AT = AssemblerOptions.back()->getATRegNum();
-  if (AT == 0)
+unsigned MipsAsmParser::getATReg(SMLoc Loc) {
+  unsigned ATIndex = AssemblerOptions.back()->getATRegNum();
+  if (ATIndex == 0) {
     reportParseError(Loc,
                      "pseudo-instruction requires $at, which is not available");
+    return 0;
+  }
+  unsigned AT = getReg(
+      (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, ATIndex);
   return AT;
 }
 
@@ -2571,7 +2577,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
   if (Tok.isNot(AsmToken::Identifier))
     return true;
 
-  std::string Str = Tok.getIdentifier().str();
+  std::string Str = Tok.getIdentifier();
 
   Parser.Lex(); // Eat the identifier.
   // Now make an expression from the rest of the operand.
@@ -3579,11 +3585,7 @@ bool MipsAsmParser::parseSetAssignment() {
   if (Parser.parseExpression(Value))
     return reportParseError("expected valid expression after comma");
 
-  // Check if the Name already exists as a symbol.
-  MCSymbol *Sym = getContext().LookupSymbol(Name);
-  if (Sym)
-    return reportParseError("symbol already defined");
-  Sym = getContext().GetOrCreateSymbol(Name);
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
   Sym->setVariableValue(Value);
 
   return false;
@@ -4044,6 +4046,23 @@ bool MipsAsmParser::parseDirectiveOption() {
   return false;
 }
 
+/// parseInsnDirective
+///  ::= .insn
+bool MipsAsmParser::parseInsnDirective() {
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  // The actual label marking happens in
+  // MipsELFStreamer::createPendingLabelRelocs().
+  getTargetStreamer().emitDirectiveInsn();
+
+  getParser().Lex(); // Eat EndOfStatement token.
+  return false;
+}
+
 /// parseDirectiveModule
 ///  ::= .module oddspreg
 ///  ::= .module nooddspreg
@@ -4437,6 +4456,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".llvm_internal_mips_reallow_module_directive")
     return parseInternalDirectiveReallowModule();
 
+  if (IDVal == ".insn")
+    return parseInsnDirective();
+
   return true;
 }
 
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index aad549d..e80a47b 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -77,7 +77,7 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                StringRef Annot) {
+                                StringRef Annot, const MCSubtargetInfo &STI) {
   switch (MI->getOpcode()) {
   default:
     break;
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 468dc07..713f35c 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -86,7 +86,8 @@ public:
   static const char *getRegisterName(unsigned RegNo);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index acf6f21..dbcd867 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -155,7 +155,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   return Value;
 }
 
-MCObjectWriter *MipsAsmBackend::createObjectWriter(raw_ostream &OS) const {
+MCObjectWriter *
+MipsAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
   return createMipsELFObjectWriter(OS,
     MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit);
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 243b73d..b3d5a49 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -36,7 +36,7 @@ public:
                  bool Is64Bit)
       : MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
 
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index a68bf16..8d9e3e3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -10,6 +10,7 @@
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "MCTargetDesc/MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCELFObjectWriter.h"
@@ -22,17 +23,33 @@
 using namespace llvm;
 
 namespace {
+// A helper structure based on ELFRelocationEntry, used for sorting entries in
+// the relocation table.
+struct MipsRelocationEntry {
+  MipsRelocationEntry(const ELFRelocationEntry &R)
+      : R(R), SortOffset(R.Offset), HasMatchingHi(false) {}
+  const ELFRelocationEntry R;
+  // SortOffset equals R.Offset except for the *HI16 relocations, for which it
+  // will be set based on the R.Offset of the matching *LO16 relocation.
+  int64_t SortOffset;
+  // True when this is a *LO16 relocation chosen as a match for a *HI16
+  // relocation.
+  bool HasMatchingHi;
+};
+
   class MipsELFObjectWriter : public MCELFObjectTargetWriter {
   public:
     MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
                         bool _isN64, bool IsLittleEndian);
 
-    virtual ~MipsELFObjectWriter();
+    ~MipsELFObjectWriter() override;
 
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
     bool needsRelocateWithSymbol(const MCSymbolData &SD,
                                  unsigned Type) const override;
+    virtual void sortRelocs(const MCAssembler &Asm,
+                            std::vector<ELFRelocationEntry> &Relocs) override;
   };
 }
 
@@ -225,6 +242,169 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
   return Type;
 }
 
+// Sort entries by SortOffset in descending order.
+// When there are more *HI16 relocs paired with one *LO16 reloc, the 2nd rule
+// sorts them in ascending order of R.Offset.
+static int cmpRelMips(const MipsRelocationEntry *AP,
+                      const MipsRelocationEntry *BP) {
+  const MipsRelocationEntry &A = *AP;
+  const MipsRelocationEntry &B = *BP;
+  if (A.SortOffset != B.SortOffset)
+    return B.SortOffset - A.SortOffset;
+  if (A.R.Offset != B.R.Offset)
+    return A.R.Offset - B.R.Offset;
+  if (B.R.Type != A.R.Type)
+    return B.R.Type - A.R.Type;
+  //llvm_unreachable("ELFRelocs might be unstable!");
+  return 0;
+}
+
+// For the given Reloc.Type, return the matching relocation type, as in the
+// table below.
+static unsigned getMatchingLoType(const MCAssembler &Asm,
+                                  const ELFRelocationEntry &Reloc) {
+  unsigned Type = Reloc.Type;
+  if (Type == ELF::R_MIPS_HI16)
+    return ELF::R_MIPS_LO16;
+  if (Type == ELF::R_MICROMIPS_HI16)
+    return ELF::R_MICROMIPS_LO16;
+  if (Type == ELF::R_MIPS16_HI16)
+    return ELF::R_MIPS16_LO16;
+
+  const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol);
+
+  if (MCELF::GetBinding(SD) != ELF::STB_LOCAL)
+    return ELF::R_MIPS_NONE;
+
+  if (Type == ELF::R_MIPS_GOT16)
+    return ELF::R_MIPS_LO16;
+  if (Type == ELF::R_MICROMIPS_GOT16)
+    return ELF::R_MICROMIPS_LO16;
+  if (Type == ELF::R_MIPS16_GOT16)
+    return ELF::R_MIPS16_LO16;
+
+  return ELF::R_MIPS_NONE;
+}
+
+// Return true if First needs a matching *LO16, its matching *LO16 type equals
+// Second's type and both relocations are against the same symbol.
+static bool areMatchingHiAndLo(const MCAssembler &Asm,
+                               const ELFRelocationEntry &First,
+                               const ELFRelocationEntry &Second) {
+  return getMatchingLoType(Asm, First) != ELF::R_MIPS_NONE &&
+         getMatchingLoType(Asm, First) == Second.Type &&
+         First.Symbol && First.Symbol == Second.Symbol;
+}
+
+// Return true if MipsRelocs[Index] is a *LO16 preceded by a matching *HI16.
+static bool
+isPrecededByMatchingHi(const MCAssembler &Asm, uint32_t Index,
+                       std::vector<MipsRelocationEntry> &MipsRelocs) {
+  return Index < MipsRelocs.size() - 1 &&
+         areMatchingHiAndLo(Asm, MipsRelocs[Index + 1].R, MipsRelocs[Index].R);
+}
+
+// Return true if MipsRelocs[Index] is a *LO16 not preceded by a matching *HI16
+// and not chosen by a *HI16 as a match.
+static bool isFreeLo(const MCAssembler &Asm, uint32_t Index,
+                     std::vector<MipsRelocationEntry> &MipsRelocs) {
+  return Index < MipsRelocs.size() && !MipsRelocs[Index].HasMatchingHi &&
+         !isPrecededByMatchingHi(Asm, Index, MipsRelocs);
+}
+
+// Lo is chosen as a match for Hi, set their fields accordingly.
+// Mips instructions have fixed length of at least two bytes (two for
+// micromips/mips16, four for mips32/64), so we can set HI's SortOffset to
+// matching LO's Offset minus one to simplify the sorting function.
+static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) {
+  Lo.HasMatchingHi = true;
+  Hi.SortOffset = Lo.R.Offset - 1;
+}
+
+// We sort relocation table entries by offset, except for one additional rule
+// required by MIPS ABI: every *HI16 relocation must be immediately followed by
+// the corresponding *LO16 relocation. We also support a GNU extension that
+// allows more *HI16s paired with one *LO16.
+//
+// *HI16 relocations and their matching *LO16 are:
+//
+// +---------------------------------------------+-------------------+
+// |               *HI16                         |  matching *LO16   |
+// |---------------------------------------------+-------------------|
+// |  R_MIPS_HI16, local R_MIPS_GOT16            |    R_MIPS_LO16    |
+// |  R_MICROMIPS_HI16, local R_MICROMIPS_GOT16  | R_MICROMIPS_LO16  |
+// |  R_MIPS16_HI16, local R_MIPS16_GOT16        |  R_MIPS16_LO16    |
+// +---------------------------------------------+-------------------+
+//
+// (local R_*_GOT16 meaning R_*_GOT16 against the local symbol.)
+//
+// To handle *HI16 and *LO16 relocations, the linker needs a combined addend
+// ("AHL") calculated from both *HI16 ("AHI") and *LO16 ("ALO") relocations:
+// AHL = (AHI << 16) + (short)ALO;
+//
+// We are reusing gnu as sorting algorithm so we are emitting the relocation
+// table sorted the same way as gnu as would sort it, for easier comparison of
+// the generated .o files.
+//
+// The logic is:
+// search the table (starting from the highest offset and going back to zero)
+// for all *HI16 relocations that don't have a matching *LO16.
+// For every such HI, find a matching LO with highest offset that isn't already
+// matched with another HI. If there are no free LOs, match it with the first
+// found (starting from lowest offset).
+// When there are more HIs matched with one LO, sort them in descending order by
+// offset.
+//
+// In other words, when searching for a matching LO:
+// - don't look for a 'better' match for the HIs that are already followed by a
+//   matching LO;
+// - prefer LOs without a pair;
+// - prefer LOs with higher offset;
+void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+                                     std::vector<ELFRelocationEntry> &Relocs) {
+  if (Relocs.size() < 2)
+    return;
+
+  // The default function sorts entries by Offset in descending order.
+  MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+
+  // Init MipsRelocs from Relocs.
+  std::vector<MipsRelocationEntry> MipsRelocs;
+  for (unsigned I = 0, E = Relocs.size(); I != E; ++I)
+    MipsRelocs.push_back(MipsRelocationEntry(Relocs[I]));
+
+  // Find a matching LO for all HIs that need it.
+  for (int32_t I = 0, E = MipsRelocs.size(); I != E; ++I) {
+    if (getMatchingLoType(Asm, MipsRelocs[I].R) == ELF::R_MIPS_NONE ||
+        (I > 0 && isPrecededByMatchingHi(Asm, I - 1, MipsRelocs)))
+      continue;
+
+    int32_t MatchedLoIndex = -1;
+
+    // Search the list in the ascending order of Offset.
+    for (int32_t J = MipsRelocs.size() - 1, N = -1; J != N; --J) {
+      // check for a match
+      if (areMatchingHiAndLo(Asm, MipsRelocs[I].R, MipsRelocs[J].R) &&
+          (MatchedLoIndex == -1 || // first match
+           // or we already have a match,
+           // but this one is with higher offset and it's free
+           (MatchedLoIndex > J && isFreeLo(Asm, J, MipsRelocs))))
+        MatchedLoIndex = J;
+    }
+
+    if (MatchedLoIndex != -1)
+      // We have a match.
+      setMatch(MipsRelocs[I], MipsRelocs[MatchedLoIndex]);
+  }
+
+  // SortOffsets are calculated, call the sorting function.
+  array_pod_sort(MipsRelocs.begin(), MipsRelocs.end(), cmpRelMips);
+
+  // Copy sorted MipsRelocs back to Relocs.
+  for (unsigned I = 0, E = MipsRelocs.size(); I != E; ++I)
+    Relocs[I] = MipsRelocs[I].R;
+}
+
 bool
 MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
                                              unsigned Type) const {
@@ -264,7 +444,8 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
   }
 }
 
-MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_pwrite_stream &OS,
+                                                uint8_t OSABI,
                                                 bool IsLittleEndian,
                                                 bool Is64Bit) {
   MCELFObjectTargetWriter *MOTW =
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 93f60df..6d1d9f4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -21,8 +21,6 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
 
   MCContext &Context = getContext();
   const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
-  MipsTargetELFStreamer *ELFTargetStreamer =
-      static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
 
   for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) {
     const MCOperand &Op = Inst.getOperand(OpIndex);
@@ -34,6 +32,14 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
     RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo);
   }
 
+  createPendingLabelRelocs();
+}
+
+void MipsELFStreamer::createPendingLabelRelocs() {
+  MipsTargetELFStreamer *ELFTargetStreamer =
+      static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
+
+  // FIXME: Also mark labels when in MIPS16 mode.
   if (ELFTargetStreamer->isMicroMipsEnabled()) {
     for (auto Label : Labels) {
       MCSymbolData &Data = getOrCreateSymbolData(Label);
@@ -70,7 +76,8 @@ void MipsELFStreamer::EmitMipsOptionRecords() {
 }
 
 MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context,
-                                           MCAsmBackend &MAB, raw_ostream &OS,
+                                           MCAsmBackend &MAB,
+                                           raw_pwrite_stream &OS,
                                            MCCodeEmitter *Emitter,
                                            bool RelaxAll) {
   return new MipsELFStreamer(Context, MAB, OS, Emitter);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 6b834c6..4e30901 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -33,7 +33,7 @@ class MipsELFStreamer : public MCELFStreamer {
 
 
 public:
-  MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
+  MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
                   MCCodeEmitter *Emitter)
       : MCELFStreamer(Context, MAB, OS, Emitter) {
 
@@ -65,10 +65,13 @@ public:
 
   /// Emits all the option records stored up until the point it's called.
   void EmitMipsOptionRecords();
+
+  /// Mark labels as microMIPS, if necessary for the subtarget.
+  void createPendingLabelRelocs();
 };
 
 MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
-                                     raw_ostream &OS, MCCodeEmitter *Emitter,
-                                     bool RelaxAll);
+                                     raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll);
 } // namespace llvm.
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index b01726d..cc40e2e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -43,7 +43,7 @@ public:
   MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle)
       : MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
 
-  ~MipsMCCodeEmitter() {}
+  ~MipsMCCodeEmitter() override {}
 
   void EmitByte(unsigned char C, raw_ostream &OS) const;
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index e6b5be7..687b800 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -23,9 +23,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
 
 // This function creates an MCELFStreamer for Mips NaCl.
 MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                         raw_ostream &OS,
-                                         MCCodeEmitter *Emitter,
-                                         bool RelaxAll);
+                                         raw_pwrite_stream &OS,
+                                         MCCodeEmitter *Emitter, bool RelaxAll);
 }
 
 #endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 6f3f37b..a75d27d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -97,17 +97,16 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+static MCInstPrinter *createMipsMCInstPrinter(const Triple &T,
                                               unsigned SyntaxVariant,
                                               const MCAsmInfo &MAI,
                                               const MCInstrInfo &MII,
-                                              const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI) {
+                                              const MCRegisterInfo &MRI) {
   return new MipsInstPrinter(MAI, MII, MRI);
 }
 
 static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
-                                    MCAsmBackend &MAB, raw_ostream &OS,
+                                    MCAsmBackend &MAB, raw_pwrite_stream &OS,
                                     MCCodeEmitter *Emitter, bool RelaxAll) {
   MCStreamer *S;
   if (!T.isOSNaCl())
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 92f394a..577a8b3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -27,6 +27,7 @@ class MCSubtargetInfo;
 class StringRef;
 class Target;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheMipsTarget;
 extern Target TheMipselTarget;
@@ -53,7 +54,7 @@ MCAsmBackend *createMipsAsmBackendEL64(const Target &T,
                                        const MCRegisterInfo &MRI, StringRef TT,
                                        StringRef CPU);
 
-MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
                                           bool IsLittleEndian, bool Is64Bit);
 
 namespace MIPS_MC {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 1adfdf9..35348aa 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -36,11 +36,11 @@ const unsigned LoadStoreStackMaskReg = Mips::T7;
 
 class MipsNaClELFStreamer : public MipsELFStreamer {
 public:
-  MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
-                      MCCodeEmitter *Emitter)
+  MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                      raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
       : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {}
 
-  ~MipsNaClELFStreamer() {}
+  ~MipsNaClELFStreamer() override {}
 
 private:
   // Whether we started the sandboxing sequence for calls.  Calls are bundled
@@ -252,7 +252,7 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) {
 }
 
 MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                                         raw_ostream &OS,
+                                         raw_pwrite_stream &OS,
                                          MCCodeEmitter *Emitter,
                                          bool RelaxAll) {
   MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 5790a5c..cfd56c6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -54,6 +54,7 @@ void MipsTargetStreamer::emitDirectiveNaN2008() {}
 void MipsTargetStreamer::emitDirectiveNaNLegacy() {}
 void MipsTargetStreamer::emitDirectiveOptionPic0() {}
 void MipsTargetStreamer::emitDirectiveOptionPic2() {}
+void MipsTargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                    unsigned ReturnReg) {}
 void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {}
@@ -189,6 +190,11 @@ void MipsTargetAsmStreamer::emitDirectiveOptionPic2() {
   OS << "\t.option\tpic2\n";
 }
 
+void MipsTargetAsmStreamer::emitDirectiveInsn() {
+  MipsTargetStreamer::emitDirectiveInsn();
+  OS << "\t.insn\n";
+}
+
 void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                       unsigned ReturnReg) {
   OS << "\t.frame\t$"
@@ -507,9 +513,8 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
   const MCSymbol &RhsSym =
       static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
   MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
-  uint8_t Type = MCELF::GetType(Data);
-  if ((Type != ELF::STT_FUNC) ||
-      !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
+
+  if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
     return;
 
   MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
@@ -637,6 +642,12 @@ void MipsTargetELFStreamer::emitDirectiveOptionPic2() {
   MCA.setELFHeaderEFlags(Flags);
 }
 
+void MipsTargetELFStreamer::emitDirectiveInsn() {
+  MipsTargetStreamer::emitDirectiveInsn();
+  MipsELFStreamer &MEF = static_cast<MipsELFStreamer &>(Streamer);
+  MEF.createPendingLabelRelocs();
+}
+
 void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
                                       unsigned ReturnReg_) {
   MCContext &Context = getStreamer().getAssembler().getContext();
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index abecfa0..5828fbd 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -143,25 +143,6 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
-void Mips16FrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  if (!hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-
-    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
-      Amount = -Amount;
-
-    const Mips16InstrInfo &TII =
-        *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
-
-    TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
-  }
-
-  MBB.erase(I);
-}
-
 bool
 Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 012d558..0287e59 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -26,10 +26,6 @@ public:
   void emitPrologue(MachineFunction &MF) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                  MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I) const override;
-
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 00d4495..a49572e 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
 void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const {
+  if (Amount == 0)
+    return;
+
   if (isInt<16>(Amount))  // need to change to addiu sp, ....and isInt<16>
     BuildAddiuSpImm(MBB, I, Amount);
   else
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index f9b7387..6540b40 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -77,7 +77,7 @@ public:
 
   /// Adjust SP by Amount bytes.
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator I) const;
+                      MachineBasicBlock::iterator I) const override;
 
   /// Emit a series of instructions to load an immediate.
   // This is to adjust some FrameReg. We return the new register to be used
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1eb3b2c..9024f21 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -260,31 +260,22 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
   unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize();
   bool HasAFGR64Reg = false;
   unsigned CSFPRegsSize = 0;
-  unsigned i, e = CSI.size();
-
-  // Set FPU Bitmask.
-  for (i = 0; i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (Mips::GPR32RegClass.contains(Reg))
-      break;
 
+  for (const auto &I : CSI) {
+    unsigned Reg = I.getReg();
     unsigned RegNum = TRI->getEncodingValue(Reg);
-    if (Mips::AFGR64RegClass.contains(Reg)) {
+
+    // If it's a floating point register, set the FPU Bitmask.
+    // If it's a general purpose register, set the CPU Bitmask.
+    if (Mips::FGR32RegClass.contains(Reg)) {
+      FPUBitmask |= (1 << RegNum);
+      CSFPRegsSize += FGR32RegSize;
+    } else if (Mips::AFGR64RegClass.contains(Reg)) {
       FPUBitmask |= (3 << RegNum);
       CSFPRegsSize += AFGR64RegSize;
       HasAFGR64Reg = true;
-      continue;
-    }
-
-    FPUBitmask |= (1 << RegNum);
-    CSFPRegsSize += FGR32RegSize;
-  }
-
-  // Set CPU Bitmask.
-  for (; i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    unsigned RegNum = TRI->getEncodingValue(Reg);
-    CPUBitmask |= (1 << RegNum);
+    } else if (Mips::GPR32RegClass.contains(Reg))
+      CPUBitmask |= (1 << RegNum);
   }
 
   // FP Regs are saved right below where the virtual frame pointer points to.
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 7de0081..e8e3d3d 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -440,7 +440,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
 
 bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
   const GlobalValue *GV = dyn_cast<GlobalValue>(V);
-  if (GV && isa<Function>(GV) && dyn_cast<Function>(GV)->isIntrinsic())
+  if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic())
     return false;
   if (!GV)
     return false;
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 8b8b019..826fbaf 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -131,3 +131,20 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
 
   return RoundUpToAlignment(Offset, getStackAlignment());
 }
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void MipsFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  unsigned SP = STI.getABI().IsN64() ? Mips::SP_64 : Mips::SP;
+
+  if (!hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+      Amount = -Amount;
+
+    STI.getInstrInfo()->adjustStackPtr(SP, Amount, MBB, I);
+  }
+
+  MBB.erase(I);
+}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 90a8d2a..96d1e29 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -32,6 +32,11 @@ public:
 
   bool hasFP(const MachineFunction &MF) const override;
 
+  void
+  eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I) const override;
+
 protected:
   uint64_t estimateStackSize(const MachineFunction &MF) const;
 };
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index e4bae03..f37737d 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -3682,6 +3682,7 @@ void MipsTargetLowering::passByValArg(
                             DAG.getIntPtrConstant(VA.getLocMemOffset()));
   Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
                         Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
+                        /*isTailCall=*/false,
                         MachinePointerInfo(), MachinePointerInfo());
   MemOpChains.push_back(Chain);
 }
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 7b2b289..4589535 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -117,6 +117,10 @@ public:
                                 const TargetRegisterInfo *TRI,
                                 int64_t Offset) const = 0;
 
+  virtual void adjustStackPtr(unsigned SP, int64_t Amount,
+                              MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const = 0;
+
   /// Create an instruction which has the same operands and memory operands
   /// as MI but has a new opcode.
   MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index c937d2b..a1fad66 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -186,10 +186,8 @@ def InMips16Mode :    Predicate<"Subtarget->inMips16Mode()">,
                       AssemblerPredicate<"FeatureMips16">;
 def HasCnMips    :    Predicate<"Subtarget->hasCnMips()">,
                       AssemblerPredicate<"FeatureCnMips">;
-def RelocStatic :     Predicate<"TM.getRelocationModel() == Reloc::Static">,
-                      AssemblerPredicate<"FeatureMips32">;
-def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
-                      AssemblerPredicate<"FeatureMips32">;
+def RelocStatic :     Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
 def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">;
 def HasStdEnc :       Predicate<"Subtarget->hasStandardEncoding()">,
                       AssemblerPredicate<"!FeatureMips16">;
@@ -1596,8 +1594,12 @@ def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
 }
 def : MipsInstAlias<"bnez $rs,$offset",
                     (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"bnezl $rs,$offset",
+                    (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
 def : MipsInstAlias<"beqz $rs,$offset",
                     (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"beqzl $rs,$offset",
+                    (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
 def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
     
 def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 09e722d..0d1ee04 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -60,15 +60,7 @@ void MipsCallEntry::printCustom(raw_ostream &O) const {
 #endif
 }
 
-MipsFunctionInfo::~MipsFunctionInfo() {
-  for (StringMap<const MipsCallEntry *>::iterator
-       I = ExternalCallEntries.begin(), E = ExternalCallEntries.end(); I != E;
-       ++I)
-    delete I->getValue();
-
-  for (const auto &Entry : GlobalCallEntries)
-    delete Entry.second;
-}
+MipsFunctionInfo::~MipsFunctionInfo() {}
 
 bool MipsFunctionInfo::globalBaseRegSet() const {
   return GlobalBaseReg;
@@ -125,21 +117,21 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
 }
 
 MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
-  const MipsCallEntry *&E = ExternalCallEntries[Name];
+  std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name];
 
   if (!E)
-    E = new MipsCallEntry(Name);
+    E = llvm::make_unique<MipsCallEntry>(Name);
 
-  return MachinePointerInfo(E);
+  return MachinePointerInfo(E.get());
 }
 
 MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
-  const MipsCallEntry *&E = GlobalCallEntries[Val];
+  std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val];
 
   if (!E)
-    E = new MipsCallEntry(Val);
+    E = llvm::make_unique<MipsCallEntry>(Val);
 
-  return MachinePointerInfo(E);
+  return MachinePointerInfo(E.get());
 }
 
 int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 217f307..32436ef 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -144,8 +144,9 @@ private:
   int MoveF64ViaSpillFI;
 
   /// MipsCallEntry maps.
-  StringMap<const MipsCallEntry *> ExternalCallEntries;
-  ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries;
+  StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries;
+  ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>>
+      GlobalCallEntries;
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index dc29cbd..746feab 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -52,7 +52,7 @@ public:
     COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
     COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
   }
-  ~MipsRegInfoRecord() {}
+  ~MipsRegInfoRecord() override {}
 
   void EmitMipsOptionRecord() override;
   void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 7c79c4c..23feb5c 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -607,26 +607,6 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
     !MFI->hasVarSizedObjects();
 }
 
-// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
-void MipsSEFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const MipsSEInstrInfo &TII =
-      *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo());
-
-  if (!hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-
-    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
-      Amount = -Amount;
-
-    unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
-    TII.adjustStackPtr(SP, Amount, MBB, I);
-  }
-
-  MBB.erase(I);
-}
-
 void MipsSEFrameLowering::
 processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 0eca1df..22448a4 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -27,10 +27,6 @@ public:
   void emitPrologue(MachineFunction &MF) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                  MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I) const override;
-
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index a598c3f..6daa632 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -948,7 +948,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
     llvm_unreachable("Unexpected asm memory constraint");
   // All memory constraints can at least accept raw pointers.
   case InlineAsm::Constraint_i:
-  case InlineAsm::Constraint_R:
     OutOps.push_back(Op);
     OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
     return false;
@@ -961,6 +960,20 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
     OutOps.push_back(Op);
     OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
     return false;
+  case InlineAsm::Constraint_R:
+    // The 'R' constraint is supposed to be much more complicated than this.
+    // However, it's becoming less useful due to architectural changes and
+    // ought to be replaced by other constraints such as 'ZC'.
+    // For now, support 9-bit signed offsets which is supportable by all
+    // subtargets for all instructions.
+    if (selectAddrRegImm9(Op, Base, Offset)) {
+      OutOps.push_back(Base);
+      OutOps.push_back(Offset);
+      return false;
+    }
+    OutOps.push_back(Op);
+    OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+    return false;
   case InlineAsm::Constraint_ZC:
     // ZC matches whatever the pref, ll, and sc instructions can handle for the
     // given subtarget.
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index b992579..cb38393 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
   unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
   unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
 
+  if (Amount == 0)
+    return;
+
   if (isInt<16>(Amount))// addi sp, sp, amount
     BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
   else { // Expand immediate that doesn't fit in 16-bit.
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index d16fab2..bebbabf 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -68,7 +68,7 @@ public:
 
   /// Adjust SP by Amount bytes.
   void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator I) const;
+                      MachineBasicBlock::iterator I) const override;
 
   /// Emit a series of instructions to load an immediate. If NewImm is a
   /// non-NULL parameter, the last instruction is not emitted, but instead
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 1ff041d..22b0c6c 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -45,6 +45,7 @@ public:
   virtual void emitDirectiveNaNLegacy();
   virtual void emitDirectiveOptionPic0();
   virtual void emitDirectiveOptionPic2();
+  virtual void emitDirectiveInsn();
   virtual void emitFrame(unsigned StackReg, unsigned StackSize,
                          unsigned ReturnReg);
   virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
@@ -160,6 +161,7 @@ public:
   void emitDirectiveNaNLegacy() override;
   void emitDirectiveOptionPic0() override;
   void emitDirectiveOptionPic2() override;
+  void emitDirectiveInsn() override;
   void emitFrame(unsigned StackReg, unsigned StackSize,
                  unsigned ReturnReg) override;
   void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
@@ -227,6 +229,7 @@ public:
   void emitDirectiveNaNLegacy() override;
   void emitDirectiveOptionPic0() override;
   void emitDirectiveOptionPic2() override;
+  void emitDirectiveInsn() override;
   void emitFrame(unsigned StackReg, unsigned StackSize,
                  unsigned ReturnReg) override;
   void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 80b2f62..ac92df9 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -28,13 +28,9 @@ using namespace llvm;
 
 #include "NVPTXGenAsmWriter.inc"
 
-
 NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                                   const MCRegisterInfo &MRI,
-                                   const MCSubtargetInfo &STI)
-  : MCInstPrinter(MAI, MII, MRI) {
-  setAvailableFeatures(STI.getFeatureBits());
-}
+                                   const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
 void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   // Decode the virtual register
@@ -72,7 +68,7 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                 StringRef Annot) {
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, OS);
 
   // Next always print the annotation.
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 0496964..02c5a21 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -25,10 +25,11 @@ class MCSubtargetInfo;
 class NVPTXInstPrinter : public MCInstPrinter {
 public:
   NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                   const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+                   const MCRegisterInfo &MRI);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 2b4d864..f9e4324 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -58,14 +58,13 @@ static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
   return X;
 }
 
-static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
+static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
                                                unsigned SyntaxVariant,
                                                const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI,
-                                               const MCSubtargetInfo &STI) {
+                                               const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new NVPTXInstPrinter(MAI, MII, MRI, STI);
+    return new NVPTXInstPrinter(MAI, MII, MRI);
   return nullptr;
 }
 
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 93fabf6..96abfa8 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -32,20 +32,28 @@ def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
                             "Target SM 2.1">;
 def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
                             "Target SM 3.0">;
+def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32",
+                            "Target SM 3.2">;
 def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
                             "Target SM 3.5">;
+def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37",
+                            "Target SM 3.7">;
 def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50",
                             "Target SM 5.0">;
+def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52",
+                            "Target SM 5.2">;
+def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53",
+                            "Target SM 5.3">;
 
 // PTX Versions
-def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30",
-                             "Use PTX version 3.0">;
-def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
-                             "Use PTX version 3.1">;
 def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
                              "Use PTX version 3.2">;
 def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40",
                              "Use PTX version 4.0">;
+def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41",
+                             "Use PTX version 4.1">;
+def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42",
+                             "Use PTX version 4.2">;
 
 //===----------------------------------------------------------------------===//
 // NVPTX supported processors.
@@ -57,8 +65,12 @@ class Proc<string Name, list<SubtargetFeature> Features>
 def : Proc<"sm_20", [SM20]>;
 def : Proc<"sm_21", [SM21]>;
 def : Proc<"sm_30", [SM30]>;
+def : Proc<"sm_32", [SM32, PTX40]>;
 def : Proc<"sm_35", [SM35]>;
-def : Proc<"sm_50", [SM50]>;
+def : Proc<"sm_37", [SM37, PTX41]>;
+def : Proc<"sm_50", [SM50, PTX40]>;
+def : Proc<"sm_52", [SM52, PTX41]>;
+def : Proc<"sm_53", [SM53, PTX42]>;
 
 
 def NVPTXInstrInfo : InstrInfo {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index cc58b07..9a71964 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -118,7 +118,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
 
   DebugLoc curLoc = MI.getDebugLoc();
 
-  if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
+  if (!prevDebugLoc && !curLoc)
     return;
 
   if (prevDebugLoc == curLoc)
@@ -126,39 +126,32 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
 
   prevDebugLoc = curLoc;
 
-  if (curLoc.isUnknown())
+  if (!curLoc)
     return;
 
-  const MachineFunction *MF = MI.getParent()->getParent();
-  //const TargetMachine &TM = MF->getTarget();
-
-  const LLVMContext &ctx = MF->getFunction()->getContext();
-  DIScope Scope(curLoc.getScope(ctx));
-
-  assert((!Scope || Scope.isScope()) &&
-    "Scope of a DebugLoc should be null or a DIScope.");
+  auto *Scope = cast_or_null<MDScope>(curLoc.getScope());
   if (!Scope)
      return;
 
-  StringRef fileName(Scope.getFilename());
-  StringRef dirName(Scope.getDirectory());
+  StringRef fileName(Scope->getFilename());
+  StringRef dirName(Scope->getDirectory());
   SmallString<128> FullPathName = dirName;
   if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
     sys::path::append(FullPathName, fileName);
-    fileName = FullPathName.str();
+    fileName = FullPathName;
   }
 
-  if (filenameMap.find(fileName.str()) == filenameMap.end())
+  if (filenameMap.find(fileName) == filenameMap.end())
     return;
 
   // Emit the line from the source file.
   if (InterleaveSrc)
-    this->emitSrcInText(fileName.str(), curLoc.getLine());
+    this->emitSrcInText(fileName, curLoc.getLine());
 
   std::stringstream temp;
-  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+  temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
        << " " << curLoc.getCol();
-  OutStreamer.EmitRawText(Twine(temp.str().c_str()));
+  OutStreamer.EmitRawText(temp.str());
 }
 
 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -641,7 +634,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
     return false;
 
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-    if (GV->getName().str() == "llvm.used")
+    if (GV->getName() == "llvm.used")
       return false;
     return true;
   }
@@ -656,7 +649,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
 
 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
-    if (othergv->getName().str() == "llvm.used")
+    if (othergv->getName() == "llvm.used")
       return true;
   }
 
@@ -780,32 +773,32 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   DbgFinder.processModule(M);
 
   unsigned i = 1;
-  for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
-    StringRef Filename(DIUnit.getFilename());
-    StringRef Dirname(DIUnit.getDirectory());
+  for (const MDCompileUnit *DIUnit : DbgFinder.compile_units()) {
+    StringRef Filename = DIUnit->getFilename();
+    StringRef Dirname = DIUnit->getDirectory();
     SmallString<128> FullPathName = Dirname;
     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
       sys::path::append(FullPathName, Filename);
-      Filename = FullPathName.str();
+      Filename = FullPathName;
     }
-    if (filenameMap.find(Filename.str()) != filenameMap.end())
+    if (filenameMap.find(Filename) != filenameMap.end())
       continue;
-    filenameMap[Filename.str()] = i;
-    OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
+    filenameMap[Filename] = i;
+    OutStreamer.EmitDwarfFileDirective(i, "", Filename);
     ++i;
   }
 
-  for (DISubprogram SP : DbgFinder.subprograms()) {
-    StringRef Filename(SP.getFilename());
-    StringRef Dirname(SP.getDirectory());
+  for (MDSubprogram *SP : DbgFinder.subprograms()) {
+    StringRef Filename = SP->getFilename();
+    StringRef Dirname = SP->getDirectory();
     SmallString<128> FullPathName = Dirname;
     if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
       sys::path::append(FullPathName, Filename);
-      Filename = FullPathName.str();
+      Filename = FullPathName;
     }
-    if (filenameMap.find(Filename.str()) != filenameMap.end())
+    if (filenameMap.find(Filename) != filenameMap.end())
       continue;
-    filenameMap[Filename.str()] = i;
+    filenameMap[Filename] = i;
     ++i;
   }
 }
@@ -1011,7 +1004,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
       msg.append("Error: ");
       msg.append("Symbol ");
       if (V->hasName())
-        msg.append(V->getName().str());
+        msg.append(V->getName());
       msg.append("has unsupported appending linkage type");
       llvm_unreachable(msg.c_str());
     } else if (!V->hasInternalLinkage() &&
@@ -1147,7 +1140,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
 
   const Function *demotedFunc = nullptr;
   if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
-    O << "// " << GVar->getName().str() << " has been demoted\n";
+    O << "// " << GVar->getName() << " has been demoted\n";
     if (localDecls.find(demotedFunc) != localDecls.end())
       localDecls[demotedFunc].push_back(GVar);
     else {
@@ -1195,9 +1188,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
         // The frontend adds zero-initializer to variables that don't have an
         // initial value, so skip warning for this case.
         if (!GVar->getInitializer()->isNullValue()) {
-          std::string warnMsg = "initial value of '" + GVar->getName().str() +
-              "' is not allowed in addrspace(" +
-              llvm::utostr_32(PTy->getAddressSpace()) + ")";
+          std::string warnMsg =
+              ("initial value of '" + GVar->getName() +
+               "' is not allowed in addrspace(" +
+               Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str();
           report_fatal_error(warnMsg.c_str());
         }
       }
@@ -1771,12 +1765,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   case Type::IntegerTyID: {
     const Type *ETy = CPV->getType();
     if (ETy == Type::getInt8Ty(CPV->getContext())) {
-      unsigned char c =
-          (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
       ptr = &c;
       aggBuffer->addBytes(ptr, 1, Bytes);
     } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
-      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
       ptr = (unsigned char *)&int16;
       aggBuffer->addBytes(ptr, 2, Bytes);
     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
@@ -2086,7 +2079,7 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
 
 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   std::stringstream temp;
-  LineReader *reader = this->getReader(filename.str());
+  LineReader *reader = this->getReader(filename);
   temp << "\n//";
   temp << filename.str();
   temp << ":";
@@ -2094,7 +2087,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   temp << " ";
   temp << reader->readLine(line);
   temp << "\n";
-  this->OutStreamer.EmitRawText(Twine(temp.str()));
+  this->OutStreamer.EmitRawText(temp.str());
 }
 
 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 6d7c99c..ae63cae 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -132,9 +132,8 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
   } else {
     // GEP is a constant expression.
     Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
-        cast<Constant>(Cast->getOperand(0)),
-        Indices,
-        GEP->isInBounds());
+        GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
+        Indices, GEP->isInBounds());
     GEP->replaceAllUsesWith(
         ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
   }
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 850c020..6fd09c4 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -347,6 +347,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
                      NewOperands[0],
                      makeArrayRef(&NewOperands[1], NumOperands - 1))
                : Builder.CreateInBoundsGEP(
+                     cast<GEPOperator>(C)->getSourceElementType(),
                      NewOperands[0],
                      makeArrayRef(&NewOperands[1], NumOperands - 1));
   case Instruction::Select:
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ff74e6e..8b06657 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3893,7 +3893,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
         const SDNode *left = N0.getOperand(0).getNode();
         const SDNode *right = N0.getOperand(1).getNode();
 
-        if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right))
+        if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
           opIsLive = true;
 
         if (!opIsLive)
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 578401a..6ab0fad 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -70,8 +70,8 @@ static void convertTransferToLoop(
 
   // srcAddr and dstAddr are expected to be pointer types,
   // so no check is made here.
-  unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
-  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
+  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointers to (char *)
   srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -84,9 +84,11 @@ static void convertTransferToLoop(
   ind->addIncoming(ConstantInt::get(indType, 0), origBB);
 
   // load from srcAddr+ind
-  Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
+  Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
+                               srcVolatile);
   // store at dstAddr+ind
-  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
+  loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
+                   dstVolatile);
 
   // The value for ind coming from backedge is (ind + 1)
   Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
@@ -106,7 +108,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   origBB->getTerminator()->setSuccessor(0, loopBB);
   IRBuilder<> builder(origBB, origBB->getTerminator());
 
-  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointer to the type of value getting stored
   dstAddr =
@@ -116,7 +118,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   PHINode *ind = loop.CreatePHI(len->getType(), 0);
   ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
 
-  loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
+  loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
 
   Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
   ind->addIncoming(newind, loopBB);
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index b8df5af..2cd10e8 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -52,7 +52,7 @@ public:
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
 
   // Emission of machine code through MCJIT is not supported.
-  bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
+  bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_pwrite_stream &,
                          bool = true) override {
     return true;
   }
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index b8af04d..dc81802 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXTargetTransformInfo.h"
+#include "NVPTXUtilities.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -19,6 +20,75 @@ using namespace llvm;
 
 #define DEBUG_TYPE "NVPTXtti"
 
+// Whether the given intrinsic reads threadIdx.x/y/z.
+static bool readsThreadIndex(const IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+    case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+    case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+      return true;
+  }
+}
+
+static bool readsLaneId(const IntrinsicInst *II) {
+  return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
+}
+
+// Whether the given intrinsic is an atomic instruction in PTX.
+static bool isNVVMAtomic(const IntrinsicInst *II) {
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::nvvm_atomic_load_add_f32:
+    case Intrinsic::nvvm_atomic_load_inc_32:
+    case Intrinsic::nvvm_atomic_load_dec_32:
+      return true;
+  }
+}
+
+bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
+  // Without inter-procedural analysis, we conservatively assume that arguments
+  // to __device__ functions are divergent.
+  if (const Argument *Arg = dyn_cast<Argument>(V))
+    return !isKernelFunction(*Arg->getParent());
+
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Without pointer analysis, we conservatively assume values loaded from
+    // generic or local address space are divergent.
+    if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      unsigned AS = LI->getPointerAddressSpace();
+      return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
+    }
+    // Atomic instructions may cause divergence. Atomic instructions are
+    // executed sequentially across all threads in a warp. Therefore, an earlier
+    // executed thread may see different memory inputs than a later executed
+    // thread. For example, suppose *a = 0 initially.
+    //
+    //   atom.global.add.s32 d, [a], 1
+    //
+    // returns 0 for the first thread that enters the critical region, and 1 for
+    // the second thread.
+    if (I->isAtomic())
+      return true;
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      // Instructions that read threadIdx are obviously divergent.
+      if (readsThreadIndex(II) || readsLaneId(II))
+        return true;
+      // Handle the NVPTX atomic instrinsics that cannot be represented as an
+      // atomic IR instruction.
+      if (isNVVMAtomic(II))
+        return true;
+    }
+    // Conservatively consider the return value of function calls as divergent.
+    // We could analyze callees with bodies more precisely using
+    // inter-procedural analysis.
+    if (isa<CallInst>(I))
+      return true;
+  }
+
+  return false;
+}
+
 unsigned NVPTXTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index bf21e88..4280888 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -61,6 +61,8 @@ public:
 
   bool hasBranchDivergence() { return true; }
 
+  bool isSourceOfDivergence(const Value *V);
+
   unsigned getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 99a1633..90ab7a5 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1071,6 +1071,58 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     Inst = TmpInst;
     break;
   }
+  case PPC::RLWINMbm:
+  case PPC::RLWINMobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::CreateImm(MB));
+    TmpInst.addOperand(MCOperand::CreateImm(ME));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::RLWIMIbm:
+  case PPC::RLWIMIobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::CreateImm(MB));
+    TmpInst.addOperand(MCOperand::CreateImm(ME));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::RLWNMbm:
+  case PPC::RLWNMobm: {
+    unsigned MB, ME;
+    int64_t BM = Inst.getOperand(3).getImm();
+    if (!isRunOfOnes(BM, MB, ME))
+      break;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(MCOperand::CreateImm(MB));
+    TmpInst.addOperand(MCOperand::CreateImm(ME));
+    Inst = TmpInst;
+    break;
+  }
   }
 }
 
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index a9f5fc7..5cbf3d9 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler {
 public:
   PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
     : MCDisassembler(STI, Ctx) {}
-  virtual ~PPCDisassembler() {}
+  ~PPCDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 311a4f2..1576544 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -51,7 +51,7 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot) {
+                               StringRef Annot, const MCSubtargetInfo &STI) {
   // Check for slwi/srwi mnemonics.
   if (MI->getOpcode() == PPC::RLWINM) {
     unsigned char SH = MI->getOperand(2).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8718743..eca37eb 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -32,7 +32,8 @@ public:
   }
   
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
   
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index bea88a2..420c5c8 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -208,7 +208,7 @@ namespace {
   public:
     DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
 
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       bool is64 = getPointerSize() == 8;
       return createPPCMachObjectWriter(
           OS,
@@ -224,8 +224,7 @@ namespace {
     ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
       PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
 
-
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       bool is64 = getPointerSize() == 8;
       return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
     }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index b817394..3e3489f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
   }
 }
 
-MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS,
                                                bool Is64Bit,
                                                bool IsLittleEndian,
                                                uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index b9f0afb..725b47b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -44,7 +44,7 @@ public:
       : MCII(mcii), CTX(ctx),
         IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
 
-  ~PPCMCCodeEmitter() {}
+  ~PPCMCCodeEmitter() override {}
 
   unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 2f7a768..423e427 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -238,14 +238,12 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
   return new PPCTargetMachOStreamer(S);
 }
 
-static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
-  bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
-  return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
+                                             const MCRegisterInfo &MRI) {
+  return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin());
 }
 
 extern "C" void LLVMInitializePowerPCTargetMC() {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 8b1e3b4..5f2117c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -18,6 +18,7 @@
 #undef PPC
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace llvm {
 class MCAsmBackend;
@@ -29,6 +30,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
 class StringRef;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target ThePPC32Target;
@@ -42,15 +44,42 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
 MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                   StringRef TT, StringRef CPU);
 
-/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
-MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
-                                         bool Is64Bit,
-                                         bool IsLittleEndian,
-                                         uint8_t OSABI);
-/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
-MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+/// Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+                                         bool IsLittleEndian, uint8_t OSABI);
+/// Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
+
+/// Returns true iff Val consists of one contiguous run of 1s with any number of
+/// 0s on either side.  The 1s are allowed to wrap from LSB to MSB, so
+/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is not,
+/// since all 1s are not contiguous.
+static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+  if (!Val)
+    return false;
+
+  if (isShiftedMask_32(Val)) {
+    // look for the first non-zero bit
+    MB = countLeadingZeros(Val);
+    // look for the first zero bit after the run of ones
+    ME = countLeadingZeros((Val - 1) ^ Val);
+    return true;
+  } else {
+    Val = ~Val; // invert mask
+    if (isShiftedMask_32(Val)) {
+      // effectively look for the first zero bit
+      ME = countLeadingZeros(Val) - 1;
+      // effectively look for the first one bit after the run of zeros
+      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+      return true;
+    }
+  }
+  // no run present
+  return false;
+}
+
 } // End llvm namespace
 
 // Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index f7259b9..44e69b7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -378,8 +378,8 @@ void PPCMachObjectWriter::RecordPPCRelocation(
   Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(
       new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index f175f6d..1a02bcc 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -86,6 +86,10 @@ def FeatureISEL      : SubtargetFeature<"isel","HasISEL", "true",
                                         "Enable the isel instruction">;
 def FeaturePOPCNTD   : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
                                         "Enable the popcnt[dw] instructions">;
+def FeatureBPERMD    : SubtargetFeature<"bpermd", "HasBPERMD", "true",
+                                        "Enable the bpermd instruction">;
+def FeatureExtDiv    : SubtargetFeature<"extdiv", "HasExtDiv", "true",
+                                        "Enable extended divide instructions">;
 def FeatureLDBRX     : SubtargetFeature<"ldbrx","HasLDBRX", "true",
                                         "Enable the ldbrx instruction">;
 def FeatureCMPB      : SubtargetFeature<"cmpb", "HasCMPB", "true",
@@ -118,6 +122,10 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
 def FeatureP8Vector  : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
                                         "Enable POWER8 vector instructions",
                                         [FeatureVSX, FeatureP8Altivec]>;
+def FeatureDirectMove :
+  SubtargetFeature<"direct-move", "HasDirectMove", "true",
+                   "Enable Power8 direct move instructions",
+                   [FeatureVSX]>;
 def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
                                              "HasPartwordAtomics", "true",
                                              "Enable l[bh]arx and st[bh]cx.">;
@@ -133,6 +141,38 @@ def DeprecatedMFTB   : SubtargetFeature<"", "DeprecatedMFTB", "true",
 def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
   "Treat vector data stream cache control instructions as deprecated">;
 
+/*  Since new processors generally contain a superset of features of those that
+    came before them, the idea is to make implementations of new processors
+    less error prone and easier to read.
+    Namely:
+        list<SubtargetFeature> Power8FeatureList = ...
+        list<SubtargetFeature> FutureProcessorSpecificFeatureList =
+            [ features that Power8 does not support ]
+        list<SubtargetFeature> FutureProcessorFeatureList =
+            !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+
+    Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+    well as providing a single point of definition if the feature set will be
+    used elsewhere.
+*/
+def ProcessorFeatures {
+  list<SubtargetFeature> Power7FeatureList =
+      [DirectivePwr7, FeatureAltivec, FeatureVSX,
+       FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+       FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+       FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+       FeatureFPRND, FeatureFPCVT, FeatureISEL,
+       FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+       Feature64Bit /*, Feature64BitRegs */,
+       FeatureBPERMD, FeatureExtDiv,
+       DeprecatedMFTB, DeprecatedDST];
+  list<SubtargetFeature> Power8SpecificFeatures =
+      [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
+       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+  list<SubtargetFeature> Power8FeatureList =
+      !listconcat(Power7FeatureList, Power8SpecificFeatures);
+}
+
 // Note: Future features to add when support is extended to more
 // recent ISA levels:
 //
@@ -243,33 +283,6 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
 def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
                                            FeatureFRES, FeatureFRSQRTE]>;
 
-/*  Since new processors generally contain a superset of features of those that
-    came before them, the idea is to make implementations of new processors
-    less error prone and easier to read.
-    Namely:
-        list<SubtargetFeature> Power8FeatureList = ...
-        list<SubtargetFeature> FutureProcessorSpecificFeatureList =
-            [ features that Power8 does not support ]
-        list<SubtargetFeature> FutureProcessorFeatureList =
-            !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
-
-    Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
-    well as providing a single point of definition if the feature set will be
-    used elsewhere.
-    
-*/
-def ProcessorFeatures {
-    list<SubtargetFeature> Power8FeatureList =
-        [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX, 
-        FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, 
-        FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
-        FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM,
-        FeatureFPRND, FeatureFPCVT, FeatureISEL,
-        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
-        Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
-        FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST];
-}
-
 def : ProcessorModel<"970", G5Model,
                   [Directive970, FeatureAltivec,
                    FeatureMFOCRF, FeatureFSqrt,
@@ -339,15 +352,7 @@ def : ProcessorModel<"pwr6x", G5Model,
                    FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
                    FeatureFPRND, Feature64Bit,
                    DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model,
-                  [DirectivePwr7, FeatureAltivec, FeatureVSX,
-                   FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
-                   FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
-                   FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
-                   FeatureFPRND, FeatureFPCVT, FeatureISEL,
-                   FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
-                   Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic,
-                   DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : ProcessorModel<"ppc64", G5Model,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index cd60906..383a1e2 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1105,25 +1105,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
     }
   }
 
-  MachineModuleInfoELF &MMIELF =
-    MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-  MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-  if (!Stubs.empty()) {
-    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      // L_foo$stub:
-      OutStreamer.EmitLabel(Stubs[i].first);
-      //   .long _foo
-      OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
-                                                    OutContext),
-                            isPPC64 ? 8 : 4/*size*/);
-    }
-
-    Stubs.clear();
-    OutStreamer.AddBlankLine();
-  }
-
   return AsmPrinter::doFinalization(M);
 }
 
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index fbd7b6d..002616b 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -958,6 +958,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
 }
 
 // Attempt to fast-select an integer-to-floating-point conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+//        direct moves should be implemented.
 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
   MVT DstVT;
   Type *DstTy = I->getType();
@@ -1065,6 +1067,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
 }
 
 // Attempt to fast-select a floating-point-to-integer conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+//        direct moves should be implemented.
 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
   MVT DstVT, SrcVT;
   Type *DstTy = I->getType();
@@ -1444,6 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
            RetVT != MVT::i8)
     return false;
+  else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+    // We can't handle boolean returns when CR bits are in use.
+    return false;
 
   // FIXME: No multi-register return values yet.
   if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3ac8e94..4f8d01b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -105,13 +105,6 @@ namespace {
       return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
     }
 
-    /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
-    /// with any number of 0s on either side.  The 1s are allowed to wrap from
-    /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
-    /// 0x0F0F0000 is not, since all 1s are not contiguous.
-    static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
-
-
     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
     /// rotate and mask opcode and mask operation.
     static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
@@ -418,30 +411,6 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
                                 getSmallIPtrImm(Offset));
 }
 
-bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
-  if (!Val)
-    return false;
-
-  if (isShiftedMask_32(Val)) {
-    // look for the first non-zero bit
-    MB = countLeadingZeros(Val);
-    // look for the first zero bit after the run of ones
-    ME = countLeadingZeros((Val - 1) ^ Val);
-    return true;
-  } else {
-    Val = ~Val; // invert mask
-    if (isShiftedMask_32(Val)) {
-      // effectively look for the first zero bit
-      ME = countLeadingZeros(Val) - 1;
-      // effectively look for the first one bit after the run of zeros
-      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
-      return true;
-    }
-  }
-  // no run present
-  return false;
-}
-
 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
                                       bool isShiftMask, unsigned &SH,
                                       unsigned &MB, unsigned &ME) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 871531e..4c0b6a6 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -996,6 +996,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
+  case PPCISD::MFVSR:           return "PPCISD::MFVSR";
+  case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
+  case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
   case PPCISD::LBRX:            return "PPCISD::LBRX";
@@ -1287,22 +1290,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
   return true;
 }
 
-/// isAllNegativeZeroVector - Returns true if all elements of build_vector
-/// are -0.0.
-bool PPC::isAllNegativeZeroVector(SDNode *N) {
-  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
-
-  APInt APVal, APUndef;
-  unsigned BitSize;
-  bool HasAnyUndefs;
-
-  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
-    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
-      return CFP->getValueAPF().isNegZero();
-
-  return false;
-}
-
 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -2234,7 +2221,7 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
   return DAG.getMemcpy(Op.getOperand(0), Op,
                        Op.getOperand(1), Op.getOperand(2),
-                       DAG.getConstant(12, MVT::i32), 8, false, true,
+                       DAG.getConstant(12, MVT::i32), 8, false, true, false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
@@ -3821,7 +3808,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           SDLoc dl) {
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       false, false, MachinePointerInfo(),
+                       false, false, false, MachinePointerInfo(),
                        MachinePointerInfo());
 }
 
@@ -5927,8 +5914,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
   RLI.MPI = MPI;
 }
 
+/// \brief Custom lowers floating point to integer conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
+                                                    SelectionDAG &DAG,
+                                                    SDLoc dl) const {
+  assert(Op.getOperand(0).getValueType().isFloatingPoint());
+  SDValue Src = Op.getOperand(0);
+
+  if (Src.getValueType() == MVT::f32)
+    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+  SDValue Tmp;
+  switch (Op.getSimpleValueType().SimpleTy) {
+  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+  case MVT::i32:
+    Tmp = DAG.getNode(
+        Op.getOpcode() == ISD::FP_TO_SINT
+            ? PPCISD::FCTIWZ
+            : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+        dl, MVT::f64, Src);
+    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
+    break;
+  case MVT::i64:
+    assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+           "i64 FP_TO_UINT is supported only with FPCVT");
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+                                                        PPCISD::FCTIDUZ,
+                      dl, MVT::f64, Src);
+    Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
+    break;
+  }
+  return Tmp;
+}
+
 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                           SDLoc dl) const {
+  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
+    return LowerFP_TO_INTDirectMove(Op, DAG, dl);
+
   ReuseLoadInfo RLI;
   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
 
@@ -6006,6 +6031,38 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
 }
 
+/// \brief Custom lowers integer to floating point conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
+                                                    SelectionDAG &DAG,
+                                                    SDLoc dl) const {
+  assert((Op.getValueType() == MVT::f32 ||
+          Op.getValueType() == MVT::f64) &&
+         "Invalid floating point type as target of conversion");
+  assert(Subtarget.hasFPCVT() &&
+         "Int to FP conversions with direct moves require FPCVT");
+  SDValue FP;
+  SDValue Src = Op.getOperand(0);
+  bool SinglePrec = Op.getValueType() == MVT::f32;
+  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
+  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
+  unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
+                             (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
+
+  if (WordInt) {
+    FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
+                     dl, MVT::f64, Src);
+    FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+  }
+  else {
+    FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
+    FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+  }
+
+  return FP;
+}
+
 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                           SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -6041,6 +6098,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                        DAG.getConstantFP(1.0, Op.getValueType()),
                        DAG.getConstantFP(0.0, Op.getValueType()));
 
+  // If we have direct moves, we can do all the conversion, skip the store/load
+  // however, without FPCVT we can't do most conversions.
+  if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+    return LowerINT_TO_FPDirectMove(Op, DAG, dl);
+
   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
          "UINT_TO_FP is supported only with FPCVT");
 
@@ -6609,7 +6671,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   unsigned SplatBitSize;
   bool HasAnyUndefs;
   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
-                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
+                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+      SplatBitSize > 32)
     return SDValue();
 
   unsigned SplatBits = APSplatBits.getZExtValue();
@@ -6676,22 +6739,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
   }
 
-  // The remaining cases assume either big endian element order or
-  // a splat-size that equates to the element size of the vector
-  // to be built.  An example that doesn't work for little endian is
-  // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
-  // and a vector element size of 16 bits.  The code below will
-  // produce the vector in big endian element order, which for little
-  // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
-
-  // For now, just avoid these optimizations in that case.
-  // FIXME: Develop correct optimizations for LE with mismatched
-  // splat and element sizes.
-
-  if (Subtarget.isLittleEndian() &&
-      SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
-    return SDValue();
-
   // Check to see if this is a wide variety of vsplti*, binop self cases.
   static const signed char SplatCsts[] = {
     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -7733,6 +7780,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
     // LowerFP_TO_INT() can only handle f32 and f64.
     if (N->getOperand(0).getValueType() == MVT::ppcf128)
       return;
@@ -11023,21 +11071,23 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            bool IsMemset, bool ZeroMemset,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
-  const Function *F = MF.getFunction();
-  // When expanding a memset, require at least two QPX instructions to cover
-  // the cost of loading the value to be stored from the constant pool.
-  if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
-     (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
-      !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
-    return MVT::v4f64;
-  }
-
-  // We should use Altivec/VSX loads and stores when available. For unaligned
-  // addresses, unaligned VSX loads are only fast starting with the P8.
-  if (Subtarget.hasAltivec() && Size >= 16 &&
-      (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
-       ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
-    return MVT::v4i32;
+  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+    const Function *F = MF.getFunction();
+    // When expanding a memset, require at least two QPX instructions to cover
+    // the cost of loading the value to be stored from the constant pool.
+    if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+       (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+        !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+      return MVT::v4f64;
+    }
+
+    // We should use Altivec/VSX loads and stores when available. For unaligned
+    // addresses, unaligned VSX loads are only fast starting with the P8.
+    if (Subtarget.hasAltivec() && Size >= 16 &&
+        (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+         ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+      return MVT::v4i32;
+  }
 
   if (Subtarget.isPPC64()) {
     return MVT::i64;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 8afd7ef..7e2ebd4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -119,6 +119,15 @@ namespace llvm {
       /// resultant GPR.  Bits corresponding to other CR regs are undefined.
       MFOCRF,
 
+      /// Direct move from a VSX register to a GPR
+      MFVSR,
+
+      /// Direct move from a GPR to a VSX register (algebraic)
+      MTVSRA,
+
+      /// Direct move from a GPR to a VSX register (zero)
+      MTVSRZ,
+
       // FIXME: Remove these once the ANDI glue bug is fixed:
       /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
       /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -368,10 +377,6 @@ namespace llvm {
     /// VSPLTB/VSPLTH/VSPLTW.
     bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
 
-    /// isAllNegativeZeroVector - Returns true if all elements of build_vector
-    /// are -0.0.
-    bool isAllNegativeZeroVector(SDNode *N);
-
     /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
     /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
     unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
@@ -649,6 +654,10 @@ namespace llvm {
 
     void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
                                 SelectionDAG &DAG, SDLoc dl) const;
+    SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
+                                     SDLoc dl) const;
+    SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
+                                     SDLoc dl) const;
 
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 183d088..d1d67cb 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -603,6 +603,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
 def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
                        "popcntd $rA, $rS", IIC_IntGeneral,
                        [(set i64:$rA, (ctpop i64:$rS))]>;
+def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "bpermd $rA, $rS, $rB", IIC_IntGeneral,
+                     [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>,
+                     isPPC64, Requires<[HasBPERMD]>;
 
 let isCodeGenOnly = 1, isCommutable = 1 in
 def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
@@ -616,14 +620,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
                        "popcntw $rA, $rS", IIC_IntGeneral,
                        [(set i32:$rA, (ctpop i32:$rS))]>;
 
-defm DIVD  : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "divd", "$rT, $rA, $rB", IIC_IntDivD,
-                       [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
-                       "divdu", "$rT, $rA, $rB", IIC_IntDivD,
-                       [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVD  : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "divd", "$rT, $rA, $rB", IIC_IntDivD,
+                          [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64;
+defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "divdu", "$rT, $rA, $rB", IIC_IntDivD,
+                          [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
+def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                     "divde $rT, $rA, $rB", IIC_IntDivD,
+                     [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+                     isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                      "divde. $rT, $rA, $rB", IIC_IntDivD,
+                      []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                      isPPC64, Requires<[HasExtDiv]>;
+def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                      "divdeu $rT, $rA, $rB", IIC_IntDivD,
+                      [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+                      isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "divdeu. $rT, $rA, $rB", IIC_IntDivD,
+                       []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                        isPPC64, Requires<[HasExtDiv]>;
 let isCommutable = 1 in
 defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
                        "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index b7a7a1f..43c2158 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -764,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = XT{5};
 }
 
+class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                     string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let B = 0;
+}
+
 class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, 
               InstrItinClass itin, list<dag> pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 5eff156..8aecb65 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -726,6 +726,8 @@ def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
 def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
 def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
 def NaNsFPMath   : Predicate<"!TM.Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Multiclass Definitions.
@@ -802,6 +804,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
   }
 }
 
+// Multiclass for instructions for which the non record form is not cracked
+// and the record form is cracked (i.e. divw, mullw, etc.)
+multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel, PPC970_DGroup_First,
+                       PPC970_DGroup_Cracked;
+  }
+}
+
 multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
                       string asmbase, string asmstr, InstrItinClass itin,
                       list<dag> pattern> {
@@ -2300,14 +2319,30 @@ defm ADDC  : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                         [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
                         PPC970_DGroup_Cracked;
 
-defm DIVW  : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "divw", "$rT, $rA, $rB", IIC_IntDivW,
-                       [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
-                       "divwu", "$rT, $rA, $rB", IIC_IntDivW,
-                       [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
-                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVW  : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                          "divw", "$rT, $rA, $rB", IIC_IntDivW,
+                          [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>;
+defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                          "divwu", "$rT, $rA, $rB", IIC_IntDivW,
+                          [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
+def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                     "divwe $rT, $rA, $rB", IIC_IntDivW,
+                     [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+                     Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                      "divwe. $rT, $rA, $rB", IIC_IntDivW,
+                      []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                      Requires<[HasExtDiv]>;
+def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                      "divweu $rT, $rA, $rB", IIC_IntDivW,
+                      [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+                      Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "divweu. $rT, $rA, $rB", IIC_IntDivW,
+                       []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+                       Requires<[HasExtDiv]>;
 let isCommutable = 1 in {
 defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
                        "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
@@ -3726,6 +3761,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)
 def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
 def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
 
+def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
+                           (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+                            (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
+                          (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+                           (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+
 // These generic branch instruction forms are used for the assembler parser only.
 // Defs and Uses are conservative, since we don't know the BO value.
 let PPC970_Unit = 7 in {
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index ec04da4..a98e58f 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -41,6 +41,9 @@ def PPClxvd2x  : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
 def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
                         [SDNPHasChain, SDNPMayStore]>;
 def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
+def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
+def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
 
 multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
                     string asmbase, string asmstr, InstrItinClass itin,
@@ -946,6 +949,7 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
    when the elements are larger than i32.
 */
 def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
 let Predicates = [HasP8Vector] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 let isCommutable = 1 in {
@@ -965,3 +969,24 @@ def XXLORC : XX3Form<60, 170,
                      [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
 } // AddedComplexity = 500
 } // HasP8Vector
+
+let Predicates = [HasDirectMove, HasVSX] in {
+// VSX direct move instructions
+def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
+                            "mfvsrd $rA, $XT", IIC_VecGeneral,
+                            [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+    Requires<[In64BitMode]>;
+def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
+                             "mfvsrwz $rA, $XT", IIC_VecGeneral,
+                             [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
+                            "mtvsrd $XT, $rA", IIC_VecGeneral,
+                            [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+    Requires<[In64BitMode]>;
+def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
+                             "mtvsrwa $XT, $rA", IIC_VecGeneral,
+                             [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
+                             "mtvsrwz $XT, $rA", IIC_VecGeneral,
+                             [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+} // HasDirectMove, HasVSX
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index 005bcaf..2947c66 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "ppc-loop-data-prefetch"
 #include "PPC.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
@@ -110,11 +111,9 @@ bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
 
   bool MadeChange = false;
 
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
-       I != E; ++I) {
-    Loop *L = *I;
-    MadeChange |= runOnLoop(L);
-  }
+  for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+    for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+      MadeChange |= runOnLoop(*L);
 
   return MadeChange;
 }
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 092a4ef..b6e7799 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -22,6 +22,7 @@
 #define DEBUG_TYPE "ppc-loop-preinc-prep"
 #include "PPC.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -143,11 +144,9 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
 
   bool MadeChange = false;
 
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end();
-       I != E; ++I) {
-    Loop *L = *I;
-    MadeChange |= runOnLoop(L);
-  }
+  for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+    for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+      MadeChange |= runOnLoop(*L);
 
   return MadeChange;
 }
@@ -159,16 +158,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
   if (!L->empty())
     return MadeChange;
 
+  DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
+
   BasicBlock *Header = L->getHeader();
 
   const PPCSubtarget *ST =
     TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
 
-  unsigned HeaderLoopPredCount = 0;
-  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
-       PI != PE; ++PI) {
-    ++HeaderLoopPredCount;
-  }
+  unsigned HeaderLoopPredCount =
+    std::distance(pred_begin(Header), pred_end(Header));
 
   // Collect buckets of comparable addresses used by loads and stores.
   typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
@@ -205,9 +203,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
       if (L->isLoopInvariant(PtrValue))
         continue;
 
-      const SCEV *LSCEV = SE->getSCEV(PtrValue);
-      if (!isa<SCEVAddRecExpr>(LSCEV))
+      const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+      if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
+        if (LARSCEV->getLoop() != L)
+          continue;
+      } else {
         continue;
+      }
 
       bool FoundBucket = false;
       for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
@@ -236,11 +238,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
   // returns a value (which might contribute to determining the loop's
   // iteration space), insert a new preheader for the loop.
   if (!LoopPredecessor ||
-      !LoopPredecessor->getTerminator()->getType()->isVoidTy())
+      !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
     LoopPredecessor = InsertPreheaderForLoop(L, this);
+    if (LoopPredecessor)
+      MadeChange = true;
+  }
   if (!LoopPredecessor)
     return MadeChange;
 
+  DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
+
   SmallSet<BasicBlock *, 16> BBChanged;
   for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
     // The base address of each bucket is transformed into a phi and the others
@@ -251,6 +258,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
     if (!BasePtrSCEV->isAffine())
       continue;
 
+    DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+    assert(BasePtrSCEV->getLoop() == L &&
+           "AddRec for the wrong loop?");
+
     Instruction *MemI = Buckets[i].begin()->second;
     Value *BasePtr = GetPointerOperand(MemI);
     assert(BasePtr && "No pointer operand");
@@ -271,6 +282,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
     if (!isSafeToExpand(BasePtrStartSCEV, *SE))
       continue;
 
+    DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
     PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
       MemI->hasName() ? MemI->getName() + ".phi" : "",
       Header->getFirstNonPHI());
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 0965cb3..6df89fe 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   unsigned OrigLen = Name.size() - PrefixLen;
 
   Name += Suffix;
-  MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+  MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
   StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
 
   // If the target flags on the operand changes the name of the symbol, do that
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index ed88803..f313b0a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -21,7 +21,6 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
@@ -83,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() {
   HasFPCVT = false;
   HasISEL = false;
   HasPOPCNTD = false;
+  HasBPERMD = false;
+  HasExtDiv = false;
   HasCMPB = false;
   HasLDBRX = false;
   IsBookE = false;
@@ -96,6 +97,7 @@ void PPCSubtarget::initializeEnvironment() {
   HasICBT = false;
   HasInvariantFunctionDescriptors = false;
   HasPartwordAtomics = false;
+  HasDirectMove = false;
   IsQPXStackUnaligned = false;
   HasHTM = false;
 }
@@ -110,11 +112,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
     else
       CPUName = "generic";
   }
-#if (defined(__APPLE__) || defined(__linux__)) && \
-    (defined(__ppc__) || defined(__powerpc__))
-  if (CPUName == "generic")
-    CPUName = sys::getHostCPUName();
-#endif
 
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b4c1bb1..8d95508 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -101,6 +101,8 @@ protected:
   bool HasFPCVT;
   bool HasISEL;
   bool HasPOPCNTD;
+  bool HasBPERMD;
+  bool HasExtDiv;
   bool HasCMPB;
   bool HasLDBRX;
   bool IsBookE;
@@ -115,6 +117,7 @@ protected:
   bool HasICBT;
   bool HasInvariantFunctionDescriptors;
   bool HasPartwordAtomics;
+  bool HasDirectMove;
   bool HasHTM;
 
   /// When targeting QPX running a stock PPC64 Linux kernel where the stack
@@ -225,6 +228,8 @@ public:
   bool hasMFOCRF() const { return HasMFOCRF; }
   bool hasISEL() const { return HasISEL; }
   bool hasPOPCNTD() const { return HasPOPCNTD; }
+  bool hasBPERMD() const { return HasBPERMD; }
+  bool hasExtDiv() const { return HasExtDiv; }
   bool hasCMPB() const { return HasCMPB; }
   bool hasLDBRX() const { return HasLDBRX; }
   bool isBookE() const { return IsBookE; }
@@ -239,6 +244,7 @@ public:
     return HasInvariantFunctionDescriptors;
   }
   bool hasPartwordAtomics() const { return HasPartwordAtomics; }
+  bool hasDirectMove() const { return HasDirectMove; }
 
   bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
   unsigned getPlatformStackAlignment() const {
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 6493713..8aaf5e1 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
 class PPCTargetStreamer : public MCTargetStreamer {
 public:
   PPCTargetStreamer(MCStreamer &S);
-  virtual ~PPCTargetStreamer();
+  ~PPCTargetStreamer() override;
   virtual void emitTCEntry(const MCSymbol &S) = 0;
   virtual void emitMachine(StringRef CPU) = 0;
   virtual void emitAbiVersion(int AbiVersion) = 0;
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index dfe988f..01233ae 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -622,6 +622,25 @@ void foo() {
   __asm__("" ::: "cr2");
 }
 
+//===-------------------------------------------------------------------------===
+Naming convention for instruction formats is very haphazard.
+We have agreed on a naming scheme as follows:
+
+<INST_form>{_<OP_type><OP_len>}+
+
+Where:
+INST_form is the instruction format (X-form, etc.)
+OP_type is the operand type - one of OPC (opcode), RD (register destination),
+                              RS (register source),
+                              RDp (destination register pair),
+                              RSp (source register pair), IM (immediate),
+                              XO (extended opcode)
+OP_len is the length of the operand in bits
+
+VSX register operands would be of length 6 (split across two fields),
+condition register fields of length 3.
+We would not need denote reserved fields in names of instruction formats.
+
 //===----------------------------------------------------------------------===//
 
 Instruction fusion was introduced in ISA 2.06 and more opportunities added in
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index 43d87d3..1d5b092 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -277,7 +277,7 @@ This will generate the following instruction sequence:
 This will almost certainly cause a load-hit-store hazard.  
 Since val is a value parameter, it should not need to be saved onto
 the stack, unless it's being done set up the vector register. Instead,
-it would be better to splat teh value into a vector register, and then
+it would be better to splat the value into a vector register, and then
 remove the (dead) stores to the stack.
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index e5d5ce2..2eb805e 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -133,6 +133,20 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
         !cast<string>(Value),
         "The size of local memory in bytes">;
 
+def FeatureGCN : SubtargetFeature<"gcn",
+        "IsGCN",
+        "true",
+        "GCN or newer GPU">;
+
+def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
+        "GCN1Encoding",
+        "true",
+        "Encoding format for SI and CI">;
+
+def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
+        "GCN3Encoding",
+        "true",
+        "Encoding format for VI">;
 class SubtargetFeatureGeneration <string Value,
                                   list<SubtargetFeature> Implies> :
         SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
@@ -158,15 +172,17 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
 
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
-         FeatureWavefrontSize64]>;
+         FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+         FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+         FeatureGCN1Encoding]>;
 
 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
         [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
-         FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+         FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+         FeatureGCN3Encoding]>;
 
 //===----------------------------------------------------------------------===//
 
@@ -197,8 +213,10 @@ def NullALU : InstrItinClass;
 
 class PredicateControl {
   Predicate SubtargetPredicate;
+  list<Predicate> AssemblerPredicates = [];
   list<Predicate> OtherPredicates = [];
   list<Predicate> Predicates = !listconcat([SubtargetPredicate],
+                                            AssemblerPredicates,
                                             OtherPredicates);
 }
 
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index d911014..b3480b4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -17,6 +17,7 @@
 //
 
 #include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
 #include "AMDGPU.h"
 #include "AMDKernelCodeT.h"
 #include "AMDGPUSubtarget.h"
@@ -574,3 +575,24 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
 
   OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
 }
+
+bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default:
+      // See if this is a generic print operand
+      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+    case 'r':
+      break;
+    }
+  }
+
+  AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
+                   *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
+  return false;
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 58ffb1e..1acff3a 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -99,6 +99,10 @@ public:
 
   void EmitEndOfAsmFile(Module &M) override;
 
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O) override;
+
 protected:
   std::vector<std::string> DisasmLines, HexLines;
   size_t DisasmLineMaxLen;
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 7341cd9..def252a 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -345,7 +345,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     unsigned NOps = N->getNumOperands();
     for (unsigned i = 0; i < NOps; i++) {
       // XXX: Why is this here?
-      if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+      if (isa<RegisterSDNode>(N->getOperand(i))) {
         IsRegSeq = false;
         break;
       }
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 62a33fa..7c5235d 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -126,6 +126,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
   setOperationAction(ISD::FRINT,  MVT::f32, Legal);
   setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
 
   setOperationAction(ISD::FROUND, MVT::f32, Custom);
   setOperationAction(ISD::FROUND, MVT::f64, Custom);
@@ -1685,14 +1687,8 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
     const unsigned bitPos = halfBitWidth - i - 1;
     SDValue POS = DAG.getConstant(bitPos, HalfVT);
     // Get value of high bit
-    // TODO: Remove the BFE part when the optimization is fixed
-    SDValue HBit;
-    if (halfBitWidth == 32 && Subtarget->hasBFE()) {
-      HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
-    } else {
-      HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
-      HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
-    }
+    SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+    HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
     HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
 
     // Shift
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 4d08201..eeb7f3f 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -358,7 +358,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
 
 def mskor_global : PatFrag<(ops node:$val, node:$ptr),
                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 }]>;
 
 
@@ -389,7 +389,7 @@ def flat_store : PatFrag<(ops node:$val, node:$ptr),
 
 def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
                             (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
+  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
 }]>;
 
 class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index f047ed0..7e274a9 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -124,7 +124,8 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
                                     *MF->getSubtarget().getInstrInfo(),
                                     *MF->getSubtarget().getRegisterInfo());
-      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
+      InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
+                            MF->getSubtarget());
 
       // Disassemble instruction/operands to hex representation.
       SmallVector<MCFixup, 4> Fixups;
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index 175dcd8..6d5f94e 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -366,8 +366,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
       Function *F = Call->getCalledFunction();
       FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
                                                 F->isVarArg());
-      Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
-                                             F->getAttributes());
+      Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
+                                             NewType, F->getAttributes());
       Function *NewF = cast<Function>(C);
       Call->setCalledFunction(NewF);
       continue;
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 0ead652..259224a 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -71,6 +71,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
       EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
       WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
       EnableVGPRSpilling(false), SGPRInitBug(false),
+      IsGCN(false), GCN1Encoding(false), GCN3Encoding(false),
       FrameLowering(TargetFrameLowering::StackGrowsUp,
                     64 * 16, // Maximum stack alignment (long16)
                     0),
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 403a3e4..aeb0817 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -71,6 +71,9 @@ private:
   int LocalMemorySize;
   bool EnableVGPRSpilling;
   bool SGPRInitBug;
+  bool IsGCN;
+  bool GCN1Encoding;
+  bool GCN3Encoding;
 
   AMDGPUFrameLowering FrameLowering;
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index ee6551b..c9b25a1 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -623,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
   for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
       ++It) {
     MachineInstr *instr = &(*It);
-    if (!instr->getDebugLoc().isUnknown())
+    if (instr->getDebugLoc())
       DL = instr->getDebugLoc();
   }
   return DL;
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 49f0f23..aaf9b32 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
@@ -27,76 +29,105 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 
 namespace {
 
-class AMDGPUAsmParser : public MCTargetAsmParser {
-  MCSubtargetInfo &STI;
-  MCAsmParser &Parser;
-
-
-  /// @name Auto-generated Match Functions
-  /// {
-
-#define GET_ASSEMBLER_HEADER
-#include "AMDGPUGenAsmMatcher.inc"
-
-  /// }
-
-public:
-  AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
-                  const MCInstrInfo &MII, const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(STI), Parser(Parser) {
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
-  }
-  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
-  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
-                               OperandVector &Operands, MCStreamer &Out,
-                               uint64_t &ErrorInfo,
-                               bool MatchingInlineAsm) override;
-  bool ParseDirective(AsmToken DirectiveID) override;
-  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
-  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
-                        SMLoc NameLoc, OperandVector &Operands) override;
-
-  bool parseCnt(int64_t &IntVal);
-  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
-};
+struct OptionalOperand;
 
 class AMDGPUOperand : public MCParsedAsmOperand {
   enum KindTy {
     Token,
-    Immediate
+    Immediate,
+    Register,
+    Expression
   } Kind;
 
+  SMLoc StartLoc, EndLoc;
+
 public:
   AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
 
+  MCContext *Ctx;
+
+  enum ImmTy {
+    ImmTyNone,
+    ImmTyDSOffset0,
+    ImmTyDSOffset1,
+    ImmTyGDS,
+    ImmTyOffset,
+    ImmTyGLC,
+    ImmTySLC,
+    ImmTyTFE,
+    ImmTyClamp,
+    ImmTyOMod
+  };
+
   struct TokOp {
     const char *Data;
     unsigned Length;
   };
 
   struct ImmOp {
+    bool IsFPImm;
+    ImmTy Type;
     int64_t Val;
   };
 
+  struct RegOp {
+    unsigned RegNo;
+    int Modifiers;
+    const MCRegisterInfo *TRI;
+  };
+
   union {
     TokOp Tok;
     ImmOp Imm;
+    RegOp Reg;
+    const MCExpr *Expr;
   };
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
     Inst.addOperand(MCOperand::CreateImm(getImm()));
   }
-  void addRegOperands(MCInst &Inst, unsigned N) const {
-    llvm_unreachable("addRegOperands");
-  }
+
   StringRef getToken() const {
     return StringRef(Tok.Data, Tok.Length);
   }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
+    if (isReg())
+      addRegOperands(Inst, N);
+    else
+      addImmOperands(Inst, N);
+  }
+
+  void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
+    Inst.addOperand(MCOperand::CreateImm(Reg.Modifiers));
+    addRegOperands(Inst, N);
+  }
+
+  void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
+    if (isImm())
+      addImmOperands(Inst, N);
+    else {
+      assert(isExpr());
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+    }
+  }
+
+  bool defaultTokenHasSuffix() const {
+    StringRef Token(Tok.Data, Tok.Length);
+
+    return Token.endswith("_e32") || Token.endswith("_e64");
+  }
+
   bool isToken() const override {
     return Kind == Token;
   }
@@ -105,52 +136,369 @@ public:
     return Kind == Immediate;
   }
 
+  bool isInlineImm() const {
+    float F = BitsToFloat(Imm.Val);
+    // TODO: Add 0.5pi for VI
+    return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
+  }
+
+  bool isDSOffset0() const {
+    assert(isImm());
+    return Imm.Type == ImmTyDSOffset0;
+  }
+
+  bool isDSOffset1() const {
+    assert(isImm());
+    return Imm.Type == ImmTyDSOffset1;
+  }
+
   int64_t getImm() const {
     return Imm.Val;
   }
 
+  enum ImmTy getImmTy() const {
+    assert(isImm());
+    return Imm.Type;
+  }
+
   bool isReg() const override {
-    return false;
+    return Kind == Register && Reg.Modifiers == -1;
+  }
+
+  bool isRegWithInputMods() const {
+    return Kind == Register && Reg.Modifiers != -1;
+  }
+
+  void setModifiers(unsigned Mods) {
+    assert(isReg());
+    Reg.Modifiers = Mods;
   }
 
   unsigned getReg() const override {
-    return 0;
+    return Reg.RegNo;
+  }
+
+  bool isRegOrImm() const {
+    return isReg() || isImm();
+  }
+
+  bool isRegClass(unsigned RCID) const {
+    return Reg.TRI->getRegClass(RCID).contains(getReg());
+  }
+
+  bool isSCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  }
+
+  bool isSSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+  }
+
+  bool isSSrc64() const {
+    return isImm() || isInlineImm() ||
+           (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
+  }
+
+  bool isVCSrc32() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVCSrc64() const {
+    return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+  }
+
+  bool isVSrc32() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+  }
+
+  bool isVSrc64() const {
+    return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
   }
 
   bool isMem() const override {
     return false;
   }
 
+  bool isExpr() const {
+    return Kind == Expression;
+  }
+
+  bool isSoppBrTarget() const {
+    return isExpr() || isImm();
+  }
+
   SMLoc getStartLoc() const override {
-    return SMLoc();
+    return StartLoc;
   }
 
   SMLoc getEndLoc() const override {
-    return SMLoc();
+    return EndLoc;
   }
 
   void print(raw_ostream &OS) const override { }
 
-  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
+  static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
+                                                  enum ImmTy Type = ImmTyNone,
+                                                  bool IsFPImm = false) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
     Op->Imm.Val = Val;
+    Op->Imm.IsFPImm = IsFPImm;
+    Op->Imm.Type = Type;
+    Op->StartLoc = Loc;
+    Op->EndLoc = Loc;
     return Op;
   }
 
-  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+  static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
+                                           bool HasExplicitEncodingSize = true) {
     auto Res = llvm::make_unique<AMDGPUOperand>(Token);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
+    Res->StartLoc = Loc;
+    Res->EndLoc = Loc;
     return Res;
   }
 
+  static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
+                                                  SMLoc E,
+                                                  const MCRegisterInfo *TRI) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+    Op->Reg.RegNo = RegNo;
+    Op->Reg.TRI = TRI;
+    Op->Reg.Modifiers = -1;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+    Op->Expr = Expr;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  bool isDSOffset() const;
+  bool isDSOffset01() const;
   bool isSWaitCnt() const;
+  bool isMubufOffset() const;
 };
 
+class AMDGPUAsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  const MCInstrInfo &MII;
+  MCAsmParser &Parser;
+
+  unsigned ForcedEncodingSize;
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "AMDGPUGenAsmMatcher.inc"
+
+  /// }
+
+public:
+  AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+               const MCInstrInfo &MII,
+               const MCTargetOptions &Options)
+      : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
+        ForcedEncodingSize(0){
+
+    if (!STI.getFeatureBits()) {
+      // Set default features.
+      STI.ToggleFeature("SOUTHERN_ISLANDS");
+    }
+
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  unsigned getForcedEncodingSize() const {
+    return ForcedEncodingSize;
+  }
+
+  void setForcedEncodingSize(unsigned Size) {
+    ForcedEncodingSize = Size;
+  }
+
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+  unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
+                               bool MatchingInlineAsm) override;
+  bool ParseDirective(AsmToken DirectiveID) override;
+  OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
+
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
+                                          int64_t Default = 0);
+  OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
+                                          OperandVector &Operands,
+                                          enum AMDGPUOperand::ImmTy ImmTy =
+                                                      AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
+                                     enum AMDGPUOperand::ImmTy ImmTy =
+                                                      AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseOptionalOps(
+                                   const ArrayRef<OptionalOperand> &OptionalOps,
+                                   OperandVector &Operands);
+
+
+  void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
+  void cvtDS(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
+
+  bool parseCnt(int64_t &IntVal);
+  OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+  OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+
+  void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseOffset(OperandVector &Operands);
+  OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseGLC(OperandVector &Operands);
+  OperandMatchResultTy parseSLC(OperandVector &Operands);
+  OperandMatchResultTy parseTFE(OperandVector &Operands);
+
+  OperandMatchResultTy parseDMask(OperandVector &Operands);
+  OperandMatchResultTy parseUNorm(OperandVector &Operands);
+  OperandMatchResultTy parseR128(OperandVector &Operands);
+
+  void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+  OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
+};
+
+struct OptionalOperand {
+  const char *Name;
+  AMDGPUOperand::ImmTy Type;
+  bool IsBit;
+  int64_t Default;
+  bool (*ConvertResult)(int64_t&);
+};
+
+}
+
+static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+  if (IsVgpr) {
+    switch (RegWidth) {
+      default: llvm_unreachable("Unknown register width");
+      case 1: return AMDGPU::VGPR_32RegClassID;
+      case 2: return AMDGPU::VReg_64RegClassID;
+      case 3: return AMDGPU::VReg_96RegClassID;
+      case 4: return AMDGPU::VReg_128RegClassID;
+      case 8: return AMDGPU::VReg_256RegClassID;
+      case 16: return AMDGPU::VReg_512RegClassID;
+    }
+  }
+
+  switch (RegWidth) {
+    default: llvm_unreachable("Unknown register width");
+    case 1: return AMDGPU::SGPR_32RegClassID;
+    case 2: return AMDGPU::SGPR_64RegClassID;
+    case 4: return AMDGPU::SReg_128RegClassID;
+    case 8: return AMDGPU::SReg_256RegClassID;
+    case 16: return AMDGPU::SReg_512RegClassID;
+  }
+}
+
+static unsigned getRegForName(const StringRef &RegName) {
+
+  return StringSwitch<unsigned>(RegName)
+    .Case("exec", AMDGPU::EXEC)
+    .Case("vcc", AMDGPU::VCC)
+    .Case("flat_scr", AMDGPU::FLAT_SCR)
+    .Case("m0", AMDGPU::M0)
+    .Case("scc", AMDGPU::SCC)
+    .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
+    .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+    .Case("vcc_lo", AMDGPU::VCC_LO)
+    .Case("vcc_hi", AMDGPU::VCC_HI)
+    .Case("exec_lo", AMDGPU::EXEC_LO)
+    .Case("exec_hi", AMDGPU::EXEC_HI)
+    .Default(0);
 }
 
 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
-  return true;
+  const AsmToken Tok = Parser.getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
+  const StringRef &RegName = Tok.getString();
+  RegNo = getRegForName(RegName);
+
+  if (RegNo) {
+    Parser.Lex();
+    return false;
+  }
+
+  // Match vgprs and sgprs
+  if (RegName[0] != 's' && RegName[0] != 'v')
+    return true;
+
+  bool IsVgpr = RegName[0] == 'v';
+  unsigned RegWidth;
+  unsigned RegIndexInClass;
+  if (RegName.size() > 1) {
+    // We have a 32-bit register
+    RegWidth = 1;
+    if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
+      return true;
+    Parser.Lex();
+  } else {
+    // We have a register greater than 32-bits.
+
+    int64_t RegLo, RegHi;
+    Parser.Lex();
+    if (getLexer().isNot(AsmToken::LBrac))
+      return true;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(RegLo))
+      return true;
+
+    if (getLexer().isNot(AsmToken::Colon))
+      return true;
+
+    Parser.Lex();
+    if (getParser().parseAbsoluteExpression(RegHi))
+      return true;
+
+    if (getLexer().isNot(AsmToken::RBrac))
+      return true;
+
+    Parser.Lex();
+    RegWidth = (RegHi - RegLo) + 1;
+    if (IsVgpr) {
+      // VGPR registers aren't aligned.
+      RegIndexInClass = RegLo;
+    } else {
+      // SGPR registers are aligned.  Max alignment is 4 dwords.
+      RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+    }
+  }
+
+  const MCRegisterInfo *TRC = getContext().getRegisterInfo();
+  unsigned RC = getRegClass(IsVgpr, RegWidth);
+  if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+    return true;
+  RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
+  return false;
+}
+
+unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+
+  uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+
+  if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
+      (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
+    return Match_InvalidOperand;
+
+  return Match_Success;
 }
 
 
@@ -162,22 +510,30 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   MCInst Inst;
 
   switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
-  case Match_Success:
-    Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
-    return false;
-  case Match_MissingFeature:
-    return Error(IDLoc, "instruction use requires an option to be enabled");
-  case Match_MnemonicFail:
-    return Error(IDLoc, "unrecognized instruction mnemonic");
-  case Match_InvalidOperand: {
-    if (ErrorInfo != ~0ULL) {
-      if (ErrorInfo >= Operands.size())
-        return Error(IDLoc, "too few operands for instruction");
+    default: break;
+    case Match_Success:
+      Inst.setLoc(IDLoc);
+      Out.EmitInstruction(Inst, STI);
+      return false;
+    case Match_MissingFeature:
+      return Error(IDLoc, "missing feature");
+
+    case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+
+    case Match_InvalidOperand: {
+      SMLoc ErrorLoc = IDLoc;
+      if (ErrorInfo != ~0ULL) {
+        if (ErrorInfo >= Operands.size()) {
+          return Error(IDLoc, "too few operands for instruction");
+        }
 
+        ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
+        if (ErrorLoc == SMLoc())
+          ErrorLoc = IDLoc;
+      }
+      return Error(ErrorLoc, "invalid operand for instruction");
     }
-    return Error(IDLoc, "invalid operand for instruction");
-  }
   }
   llvm_unreachable("Implement any new match types added!");
 }
@@ -186,6 +542,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   return true;
 }
 
+static bool operandsHaveModifiers(const OperandVector &Operands) {
+
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+    if (Op.isRegWithInputMods())
+      return true;
+    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
+                       Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
+      return true;
+  }
+  return false;
+}
+
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
@@ -194,17 +563,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
   // If we successfully parsed the operand or if there as an error parsing,
   // we are done.
-  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+  //
+  // If we are parsing after we reach EndOfStatement then this means we
+  // are appending default values to the Operands list.  This is only done
+  // by custom parser, so we shouldn't continue on to the generic parsing.
+  if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
+      getLexer().is(AsmToken::EndOfStatement))
     return ResTy;
 
+  bool Negate = false, Abs = false;
+  if (getLexer().getKind()== AsmToken::Minus) {
+    Parser.Lex();
+    Negate = true;
+  }
+
+  if (getLexer().getKind() == AsmToken::Pipe) {
+    Parser.Lex();
+    Abs = true;
+  }
+
   switch(getLexer().getKind()) {
     case AsmToken::Integer: {
+      SMLoc S = Parser.getTok().getLoc();
       int64_t IntVal;
       if (getParser().parseAbsoluteExpression(IntVal))
         return MatchOperand_ParseFail;
-      Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
+      APInt IntVal32(32, IntVal);
+      if (IntVal32.getSExtValue() != IntVal) {
+        Error(S, "invalid immediate: only 32-bit values are legal");
+        return MatchOperand_ParseFail;
+      }
+
+      IntVal = IntVal32.getSExtValue();
+      if (Negate)
+        IntVal *= -1;
+      Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
       return MatchOperand_Success;
     }
+    case AsmToken::Real: {
+      // FIXME: We should emit an error if a double precisions floating-point
+      // value is used.  I'm not sure the best way to detect this.
+      SMLoc S = Parser.getTok().getLoc();
+      int64_t IntVal;
+      if (getParser().parseAbsoluteExpression(IntVal))
+        return MatchOperand_ParseFail;
+
+      APFloat F((float)BitsToDouble(IntVal));
+      if (Negate)
+        F.changeSign();
+      Operands.push_back(
+          AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+      return MatchOperand_Success;
+    }
+    case AsmToken::Identifier: {
+      SMLoc S, E;
+      unsigned RegNo;
+      if (!ParseRegister(RegNo, S, E)) {
+
+        bool HasModifiers = operandsHaveModifiers(Operands);
+        unsigned Modifiers = 0;
+
+        if (Negate)
+          Modifiers |= 0x1;
+
+        if (Abs) {
+          if (getLexer().getKind() != AsmToken::Pipe)
+            return MatchOperand_ParseFail;
+          Parser.Lex();
+          Modifiers |= 0x2;
+        }
+
+        if (Modifiers && !HasModifiers) {
+          // We are adding a modifier to src1 or src2 and previous sources
+          // don't have modifiers, so we need to go back and empty modifers
+          // for each previous source.
+          for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
+               --PrevRegIdx) {
+
+            AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
+            RegOp.setModifiers(0);
+          }
+        }
+
+
+        Operands.push_back(AMDGPUOperand::CreateReg(
+            RegNo, S, E, getContext().getRegisterInfo()));
+
+        if (HasModifiers || Modifiers) {
+          AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
+          RegOp.setModifiers(Modifiers);
+
+        }
+     }  else {
+      Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
+                                                    S));
+      Parser.Lex();
+     }
+     return MatchOperand_Success;
+    }
     default:
       return MatchOperand_NoMatch;
   }
@@ -213,22 +669,282 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
                                        StringRef Name,
                                        SMLoc NameLoc, OperandVector &Operands) {
+
+  // Clear any forced encodings from the previous instruction.
+  setForcedEncodingSize(0);
+
+  if (Name.endswith("_e64"))
+    setForcedEncodingSize(64);
+  else if (Name.endswith("_e32"))
+    setForcedEncodingSize(32);
+
   // Add the instruction mnemonic
   Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
 
-  if (getLexer().is(AsmToken::EndOfStatement))
-    return false;
+  while (!getLexer().is(AsmToken::EndOfStatement)) {
+    AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+
+    // Eat the comma or space if there is one.
+    if (getLexer().is(AsmToken::Comma))
+      Parser.Lex();
 
-  AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
-  switch (Res) {
-    case MatchOperand_Success: return false;
-    case MatchOperand_ParseFail: return Error(NameLoc,
-                                              "Failed parsing operand");
-    case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
+    switch (Res) {
+      case MatchOperand_Success: break;
+      case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
+                                                "failed parsing operand.");
+      case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
+                                              "not a valid operand.");
+    }
   }
-  return true;
+
+  // Once we reach end of statement, continue parsing so we can add default
+  // values for optional arguments.
+  AMDGPUAsmParser::OperandMatchResultTy Res;
+  while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
+    if (Res != MatchOperand_Success)
+      return Error(getLexer().getLoc(), "failed parsing operand.");
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Utility functions
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
+                                    int64_t Default) {
+
+  // We are at the end of the statement, and this is a default argument, so
+  // use a default value.
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    Int = Default;
+    return MatchOperand_Success;
+  }
+
+  switch(getLexer().getKind()) {
+    default: return MatchOperand_NoMatch;
+    case AsmToken::Identifier: {
+      StringRef OffsetName = Parser.getTok().getString();
+      if (!OffsetName.equals(Prefix))
+        return MatchOperand_NoMatch;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Colon))
+        return MatchOperand_ParseFail;
+
+      Parser.Lex();
+      if (getLexer().isNot(AsmToken::Integer))
+        return MatchOperand_ParseFail;
+
+      if (getParser().parseAbsoluteExpression(Int))
+        return MatchOperand_ParseFail;
+      break;
+    }
+  }
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
+                                    enum AMDGPUOperand::ImmTy ImmTy) {
+
+  SMLoc S = Parser.getTok().getLoc();
+  int64_t Offset = 0;
+
+  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
+  if (Res != MatchOperand_Success)
+    return Res;
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
+  return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
+                               enum AMDGPUOperand::ImmTy ImmTy) {
+  int64_t Bit = 0;
+  SMLoc S = Parser.getTok().getLoc();
+
+  // We are at the end of the statement, and this is a default argument, so
+  // use a default value.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    switch(getLexer().getKind()) {
+      case AsmToken::Identifier: {
+        StringRef Tok = Parser.getTok().getString();
+        if (Tok == Name) {
+          Bit = 1;
+          Parser.Lex();
+        } else if (Tok.startswith("no") && Tok.endswith(Name)) {
+          Bit = 0;
+          Parser.Lex();
+        } else {
+          return MatchOperand_NoMatch;
+        }
+        break;
+      }
+      default:
+        return MatchOperand_NoMatch;
+    }
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+  return MatchOperand_Success;
+}
+
+static bool operandsHasOptionalOp(const OperandVector &Operands,
+                                  const OptionalOperand &OOp) {
+  for (unsigned i = 0; i < Operands.size(); i++) {
+    const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
+    if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
+        (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
+      return true;
+
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
+                                   OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  for (const OptionalOperand &Op : OptionalOps) {
+    if (operandsHasOptionalOp(Operands, Op))
+      continue;
+    AMDGPUAsmParser::OperandMatchResultTy Res;
+    int64_t Value;
+    if (Op.IsBit) {
+      Res = parseNamedBit(Op.Name, Operands, Op.Type);
+      if (Res == MatchOperand_NoMatch)
+        continue;
+      return Res;
+    }
+
+    Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
+
+    if (Res == MatchOperand_NoMatch)
+      continue;
+
+    if (Res != MatchOperand_Success)
+      return Res;
+
+    if (Op.ConvertResult && !Op.ConvertResult(Value)) {
+      return MatchOperand_ParseFail;
+    }
+
+    Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
+    return MatchOperand_Success;
+  }
+  return MatchOperand_NoMatch;
+}
+
+//===----------------------------------------------------------------------===//
+// ds
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand DSOptionalOps [] = {
+  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+static const OptionalOperand DSOptionalOpsOff01 [] = {
+  {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
+  {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(DSOptionalOps, Operands);
+}
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(DSOptionalOpsOff01, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  AMDGPUAsmParser::OperandMatchResultTy Res =
+    parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
+  if (Res == MatchOperand_NoMatch) {
+    Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+                       AMDGPUOperand::ImmTyOffset));
+    Res = MatchOperand_Success;
+  }
+  return Res;
+}
+
+bool AMDGPUOperand::isDSOffset() const {
+  return isImm() && isUInt<16>(getImm());
+}
+
+bool AMDGPUOperand::isDSOffset01() const {
+  return isImm() && isUInt<8>(getImm());
+}
+
+void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
+                                    const OperandVector &Operands) {
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
+  unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
+  unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+
+  ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
+  ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
+  ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+  Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
 }
 
+void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+  bool GDSOnly = false;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    if (Op.isToken() && Op.getToken() == "gds") {
+      GDSOnly = true;
+      continue;
+    }
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
+
+  if (!GDSOnly) {
+    unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+    ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+  }
+  Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
+}
+
+
 //===----------------------------------------------------------------------===//
 // s_waitcnt
 //===----------------------------------------------------------------------===//
@@ -283,6 +999,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
   // expcnt  [6:4]
   // lgkmcnt [10:8]
   int64_t CntVal = 0x77f;
+  SMLoc S = Parser.getTok().getLoc();
 
   switch(getLexer().getKind()) {
     default: return MatchOperand_ParseFail;
@@ -299,7 +1016,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
       } while(getLexer().isNot(AsmToken::EndOfStatement));
       break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
+  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
   return MatchOperand_Success;
 }
 
@@ -307,6 +1024,245 @@ bool AMDGPUOperand::isSWaitCnt() const {
   return isImm();
 }
 
+//===----------------------------------------------------------------------===//
+// sopp branch targets
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+
+  switch (getLexer().getKind()) {
+    default: return MatchOperand_ParseFail;
+    case AsmToken::Integer: {
+      int64_t Imm;
+      if (getParser().parseAbsoluteExpression(Imm))
+        return MatchOperand_ParseFail;
+      Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+      return MatchOperand_Success;
+    }
+
+    case AsmToken::Identifier:
+      Operands.push_back(AMDGPUOperand::CreateExpr(
+          MCSymbolRefExpr::Create(getContext().GetOrCreateSymbol(
+                                  Parser.getTok().getString()), getContext()), S));
+      Parser.Lex();
+      return MatchOperand_Success;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// mubuf
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand MubufOptionalOps [] = {
+  {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
+  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
+  return parseOptionalOps(MubufOptionalOps, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
+  return parseIntWithPrefix("offset", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
+  return parseNamedBit("glc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
+  return parseNamedBit("slc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
+  return parseNamedBit("tfe", Operands);
+}
+
+bool AMDGPUOperand::isMubufOffset() const {
+  return isImm() && isUInt<12>(getImm());
+}
+
+void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
+                               const OperandVector &Operands) {
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      Op.addRegOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle the case where soffset is an immediate
+    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+      Op.addImmOperands(Inst, 1);
+      continue;
+    }
+
+    // Handle tokens like 'offen' which are sometimes hard-coded into the
+    // asm string.  There are no MCInst operands for these.
+    if (Op.isToken()) {
+      continue;
+    }
+    assert(Op.isImm());
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  assert(OptionalIdx.size() == 4);
+
+  unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+  unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
+  unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
+  unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
+
+  ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
+  ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+}
+
+//===----------------------------------------------------------------------===//
+// mimg
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
+  return parseIntWithPrefix("dmask", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
+  return parseNamedBit("unorm", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseR128(OperandVector &Operands) {
+  return parseNamedBit("r128", Operands);
+}
+
+//===----------------------------------------------------------------------===//
+// vop3
+//===----------------------------------------------------------------------===//
+
+static bool ConvertOmodMul(int64_t &Mul) {
+  if (Mul != 1 && Mul != 2 && Mul != 4)
+    return false;
+
+  Mul >>= 1;
+  return true;
+}
+
+static bool ConvertOmodDiv(int64_t &Div) {
+  if (Div == 1) {
+    Div = 0;
+    return true;
+  }
+
+  if (Div == 2) {
+    Div = 3;
+    return true;
+  }
+
+  return false;
+}
+
+static const OptionalOperand VOP3OptionalOps [] = {
+  {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
+  {"mul",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
+  {"div",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
+};
+
+static bool isVOP3(OperandVector &Operands) {
+  if (operandsHaveModifiers(Operands))
+    return true;
+
+  AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
+
+  if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
+    return true;
+
+  if (Operands.size() >= 5)
+    return true;
+
+  if (Operands.size() > 3) {
+    AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
+    if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
+                            Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
+      return true;
+  }
+  return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
+
+  // The value returned by this function may change after parsing
+  // an operand so store the original value here.
+  bool HasModifiers = operandsHaveModifiers(Operands);
+
+  bool IsVOP3 = isVOP3(Operands);
+  if (HasModifiers || IsVOP3 ||
+      getLexer().isNot(AsmToken::EndOfStatement) ||
+      getForcedEncodingSize() == 64) {
+
+    AMDGPUAsmParser::OperandMatchResultTy Res =
+        parseOptionalOps(VOP3OptionalOps, Operands);
+
+    if (!HasModifiers && Res == MatchOperand_Success) {
+      // We have added a modifier operation, so we need to make sure all
+      // previous register operands have modifiers
+      for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
+        AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+        if (Op.isReg())
+          Op.setModifiers(0);
+      }
+    }
+    return Res;
+  }
+  return MatchOperand_NoMatch;
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
+  unsigned i = 2;
+
+  std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+  if (operandsHaveModifiers(Operands)) {
+    for (unsigned e = Operands.size(); i != e; ++i) {
+      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+      if (Op.isRegWithInputMods()) {
+        ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
+        continue;
+      }
+      OptionalIdx[Op.getImmTy()] = i;
+    }
+
+    unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
+    unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
+
+    ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
+    ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
+  } else {
+    for (unsigned e = Operands.size(); i != e; ++i)
+      ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
+  }
+}
+
 /// Force static initialization.
 extern "C" void LLVMInitializeR600AsmParser() {
   RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index d62fd3f..279c3eb 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -20,7 +20,7 @@
 using namespace llvm;
 
 void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                             StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   OS.flush();
   printInstruction(MI, OS);
 
@@ -89,14 +89,18 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
 
 void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
-  O << " offset0:";
-  printU8ImmDecOperand(MI, OpNo, O);
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset0:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
 }
 
 void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
-  O << " offset1:";
-  printU8ImmDecOperand(MI, OpNo, O);
+  if (MI->getOperand(OpNo).getImm()) {
+    O << " offset1:";
+    printU8ImmDecOperand(MI, OpNo, O);
+  }
 }
 
 void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
@@ -123,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
     O << " tfe";
 }
 
-void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
+void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
+                                        const MCRegisterInfo &MRI) {
   switch (reg) {
   case AMDGPU::VCC:
     O << "vcc";
@@ -293,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       break;
 
     default:
-      printRegOperand(Op.getReg(), O);
+      printRegOperand(Op.getReg(), O, MRI);
       break;
     }
   } else if (Op.isImm()) {
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 5289718..14fb511 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -29,7 +29,10 @@ public:
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  static void printRegOperand(unsigned RegNo, raw_ostream &O,
+                              const MCRegisterInfo &MRI);
 
 private:
   void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index d0c634f..f33e692 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -24,7 +24,7 @@ namespace {
 
 class AMDGPUMCObjectWriter : public MCObjectWriter {
 public:
-  AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+  AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
   void ExecutePostLayoutBinding(MCAssembler &Asm,
                                 const MCAsmLayout &Layout) override {
     //XXX: Implement if necessary.
@@ -131,7 +131,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
 public:
   ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createAMDGPUELFObjectWriter(OS);
   }
 };
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 5fb94d5..59f45ff 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -33,7 +33,7 @@ protected:
 AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
   : MCELFObjectTargetWriter(false, 0, 0, false) { }
 
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
   MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
   return createELFObjectWriter(MOTW, OS, true);
 }
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index fb2deef..7b280a4 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -64,12 +64,11 @@ static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI,
                                                 const MCInstrInfo &MII,
-                                                const MCRegisterInfo &MRI,
-                                                const MCSubtargetInfo &STI) {
+                                                const MCRegisterInfo &MRI) {
   return new AMDGPUInstPrinter(MAI, MII, MRI);
 }
 
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index 23f0196..9a7548e 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -28,6 +28,7 @@ class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheAMDGPUTarget;
@@ -44,7 +45,7 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
 MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                      StringRef TT, StringRef CPU);
 
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
 } // End llvm namespace
 
 #define GET_REGINFO_ENUM
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 760aa37..24f2b6d 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -49,7 +49,7 @@ public:
                   MCContext &ctx)
     : MCII(mcii), MRI(mri), Ctx(ctx) { }
 
-  ~SIMCCodeEmitter() { }
+  ~SIMCCodeEmitter() override {}
 
   /// \brief Encode the instruction and write it to the OS.
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index a34e2dc..b6b7067 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1811,7 +1811,7 @@ SDValue Swz[4], SelectionDAG &DAG) const {
 
   BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
   for (unsigned i = 0; i < 4; i++) {
-    unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+    unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
       Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
   }
@@ -1819,7 +1819,7 @@ SDValue Swz[4], SelectionDAG &DAG) const {
   SwizzleRemap.clear();
   BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
   for (unsigned i = 0; i < 4; i++) {
-    unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+    unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
     if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
       Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
   }
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 291fb04..7126c82 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -683,6 +683,11 @@ def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
 // TODO: Do these actually match the regular fmin/fmax behavior?
 def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
 def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
+// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx
+// DX10 min/max returns the other operand if one is NaN,
+// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic
+def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>;
+def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
 
 // For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
 // so some of the instruction names don't match the asm string.
diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
index 419ec8b..2fc7b02 100644
--- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
@@ -162,7 +162,7 @@ class R600TextureIntrinsicsReplacer :
     Value *SamplerId = I.getArgOperand(2);
 
     unsigned TextureType =
-        dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+        cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
 
     unsigned SrcSelect[4] = { 0, 1, 2, 3 };
     unsigned CT[4] = {1, 1, 1, 1};
@@ -186,7 +186,7 @@ class R600TextureIntrinsicsReplacer :
     Value *SamplerId = I.getArgOperand(5);
 
     unsigned TextureType =
-        dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
+        cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
 
     unsigned SrcSelect[4] = { 0, 1, 2, 3 };
     unsigned CT[4] = {1, 1, 1, 1};
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index 79f6532..d39ab3f 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass {
 
   void insertElse(BranchInst *Term);
 
-  Value *handleLoopCondition(Value *Cond, PHINode *Broken);
+  Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
 
   void handleLoop(BranchInst *Term);
 
@@ -207,7 +207,8 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
 }
 
 /// \brief Recursively handle the condition leading to a loop
-Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
+Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
+                                                  llvm::Loop *L) {
   if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
     BasicBlock *Parent = Phi->getParent();
     PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
@@ -223,7 +224,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
       }
 
       Phi->setIncomingValue(i, BoolFalse);
-      Value *PhiArg = handleLoopCondition(Incoming, Broken);
+      Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
       NewPhi->addIncoming(PhiArg, From);
     }
 
@@ -253,7 +254,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
 
   } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
     BasicBlock *Parent = Inst->getParent();
-    TerminatorInst *Insert = Parent->getTerminator();
+    Instruction *Insert;
+    if (L->contains(Inst)) {
+      Insert = Parent->getTerminator();
+    } else {
+      Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
+    }
     Value *Args[] = { Cond, Broken };
     return CallInst::Create(IfBreak, Args, "", Insert);
 
@@ -265,14 +271,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
 
 /// \brief Handle a back edge (loop)
 void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+  BasicBlock *BB = Term->getParent();
+  llvm::Loop *L = LI->getLoopFor(BB);
   BasicBlock *Target = Term->getSuccessor(1);
   PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
 
   Value *Cond = Term->getCondition();
   Term->setCondition(BoolTrue);
-  Value *Arg = handleLoopCondition(Cond, Broken);
+  Value *Arg = handleLoopCondition(Cond, Broken, L);
 
-  BasicBlock *BB = Term->getParent();
   for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
        PI != PE; ++PI) {
 
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index bd0c3c2..43507d8 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -76,8 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
   setOperationAction(ISD::FSIN, MVT::f32, Custom);
   setOperationAction(ISD::FCOS, MVT::f32, Custom);
 
-  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
-  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
   setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
   setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
 
@@ -2089,3 +2087,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
                             cast<RegisterSDNode>(VReg)->getReg(), VT);
 }
+
+//===----------------------------------------------------------------------===//
+//                         SI Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass *>
+SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                               const std::string &Constraint,
+                                               MVT VT) const {
+  if (Constraint == "r") {
+    switch(VT.SimpleTy) {
+      default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
+      case MVT::i64:
+        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+      case MVT::i32:
+        return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+    }
+  }
+
+  if (Constraint.size() > 1) {
+    const TargetRegisterClass *RC = nullptr;
+    if (Constraint[1] == 'v') {
+      RC = &AMDGPU::VGPR_32RegClass;
+    } else if (Constraint[1] == 's') {
+      RC = &AMDGPU::SGPR_32RegClass;
+    }
+
+    if (RC) {
+      unsigned Idx = std::atoi(Constraint.substr(2).c_str());
+      if (Idx < RC->getNumRegs())
+        return std::make_pair(RC->getRegister(Idx), RC);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 92f5847..a6bc7c6 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -113,6 +113,10 @@ public:
   MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
                                   SDLoc DL,
                                   SDValue Ptr) const;
+
+  std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
+                                   const TargetRegisterInfo *TRI,
+                                   const std::string &Constraint, MVT VT) const override;
 };
 
 } // End namespace llvm
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 4167590..bc693c3 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -130,6 +130,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
   let AddedComplexity = -1000;
 
   let VOP3 = 1;
+  let VALU = 1;
+
+  let AsmMatchConverter = "cvtVOP3";
+  let isCodeGenOnly = 0;
+
   int Size = 8;
 }
 
@@ -181,6 +186,19 @@ class SOPKe <bits<5> op> : Enc32 {
   let Inst{31-28} = 0xb; //encoding
 }
 
+class SOPK64e <bits<5> op> : Enc64 {
+  bits <7> sdst = 0;
+  bits <16> simm16;
+  bits <32> imm;
+
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = sdst;
+  let Inst{27-23} = op;
+  let Inst{31-28} = 0xb;
+
+  let Inst{63-32} = imm;
+}
+
 class SOPPe <bits<7> op> : Enc32 {
   bits <16> simm16;
 
@@ -208,6 +226,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
+  let isCodeGenOnly = 0;
   let SALU = 1;
   let SOP1 = 1;
 }
@@ -218,6 +237,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
+  let isCodeGenOnly = 0;
   let SALU = 1;
   let SOP2 = 1;
 
@@ -233,6 +253,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let hasSideEffects = 0;
   let SALU = 1;
   let SOPC = 1;
+  let isCodeGenOnly = 0;
 
   let UseNamedOperandTable = 1;
 }
@@ -550,10 +571,14 @@ let Uses = [EXEC] in {
 
 class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
     VOP1Common <outs, ins, asm, pattern>,
-    VOP1e<op>;
+    VOP1e<op> {
+  let isCodeGenOnly = 0;
+}
 
 class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
+    VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
+  let isCodeGenOnly = 0;
+}
 
 class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
     VOPCCommon <ins, asm, pattern>, VOPCe <op>;
@@ -586,6 +611,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
   let mayStore = 1;
 
   let hasSideEffects = 0;
+  let AsmMatchConverter = "cvtDS";
   let SchedRW = [WriteLDS];
 }
 
@@ -598,6 +624,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
 
   let hasSideEffects = 0;
   let UseNamedOperandTable = 1;
+  let AsmMatchConverter = "cvtMubuf";
   let SchedRW = [WriteVMEM];
 }
 
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index d603ecb..076a0ce 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -6,6 +6,15 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+def isSICI : Predicate<
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
+>, AssemblerPredicate<"FeatureGCN1Encoding">;
+def isCI : Predicate<"Subtarget->getGeneration() "
+                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
+def isVI : Predicate <
+  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+  AssemblerPredicate<"FeatureGCN3Encoding">;
 
 class vop {
   field bits<9> SI3;
@@ -233,14 +242,88 @@ def FRAMEri32 : Operand<iPTR> {
   let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
 }
 
+def SoppBrTarget : AsmOperandClass {
+  let Name = "SoppBrTarget";
+  let ParserMethod = "parseSOppBrTarget";
+}
+
 def sopp_brtarget : Operand<OtherVT> {
   let EncoderMethod = "getSOPPBrEncoding";
   let OperandType = "OPERAND_PCREL";
+  let ParserMatchClass = SoppBrTarget;
 }
 
 include "SIInstrFormats.td"
 include "VIInstrFormats.td"
 
+def MubufOffsetMatchClass : AsmOperandClass {
+  let Name = "MubufOffset";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "DSOffset"#parser;
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isDSOffset";
+}
+
+def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
+def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
+
+def DSOffset01MatchClass : AsmOperandClass {
+  let Name = "DSOffset1";
+  let ParserMethod = "parseDSOff01OptionalOps";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isDSOffset01";
+}
+
+class GDSBaseMatchClass <string parser> : AsmOperandClass {
+  let Name = "GDS"#parser;
+  let PredicateMethod = "isImm";
+  let ParserMethod = parser;
+  let RenderMethod = "addImmOperands";
+}
+
+def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
+def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
+
+def GLCMatchClass : AsmOperandClass {
+  let Name = "GLC";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def SLCMatchClass : AsmOperandClass {
+  let Name = "SLC";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def TFEMatchClass : AsmOperandClass {
+  let Name = "TFE";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseMubufOptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def OModMatchClass : AsmOperandClass {
+  let Name = "OMod";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseVOP3OptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
+def ClampMatchClass : AsmOperandClass {
+  let Name = "Clamp";
+  let PredicateMethod = "isImm";
+  let ParserMethod = "parseVOP3OptionalOps";
+  let RenderMethod = "addImmOperands";
+}
+
 let OperandType = "OPERAND_IMMEDIATE" in {
 
 def offen : Operand<i1> {
@@ -254,35 +337,52 @@ def addr64 : Operand<i1> {
 }
 def mbuf_offset : Operand<i16> {
   let PrintMethod = "printMBUFOffset";
+  let ParserMatchClass = MubufOffsetMatchClass;
 }
-def ds_offset : Operand<i16> {
+class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
   let PrintMethod = "printDSOffset";
+  let ParserMatchClass = mc;
 }
+def ds_offset : ds_offset_base <DSOffsetMatchClass>;
+def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
+
 def ds_offset0 : Operand<i8> {
   let PrintMethod = "printDSOffset0";
+  let ParserMatchClass = DSOffset01MatchClass;
 }
 def ds_offset1 : Operand<i8> {
   let PrintMethod = "printDSOffset1";
+  let ParserMatchClass = DSOffset01MatchClass;
 }
-def gds : Operand <i1> {
+class gds_base <AsmOperandClass mc> : Operand <i1> {
   let PrintMethod = "printGDS";
+  let ParserMatchClass = mc;
 }
+def gds : gds_base <GDSMatchClass>;
+
+def gds01 : gds_base <GDS01MatchClass>;
+
 def glc : Operand <i1> {
   let PrintMethod = "printGLC";
+  let ParserMatchClass = GLCMatchClass;
 }
 def slc : Operand <i1> {
   let PrintMethod = "printSLC";
+  let ParserMatchClass = SLCMatchClass;
 }
 def tfe : Operand <i1> {
   let PrintMethod = "printTFE";
+  let ParserMatchClass = TFEMatchClass;
 }
 
 def omod : Operand <i32> {
   let PrintMethod = "printOModSI";
+  let ParserMatchClass = OModMatchClass;
 }
 
 def ClampMod : Operand <i1> {
   let PrintMethod = "printClampSI";
+  let ParserMatchClass = ClampMatchClass;
 }
 
 } // End OperandType = "OPERAND_IMMEDIATE"
@@ -392,12 +492,18 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
 class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
   SOP1 <outs, ins, asm, []>,
   SOP1e <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isSICI];
+}
 
 class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
   SOP1 <outs, ins, asm, []>,
   SOP1e <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
                    list<dag> pattern> {
@@ -473,12 +579,16 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
 class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
   SOP2<outs, ins, asm, []>,
   SOP2e<op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
   SOP2<outs, ins, asm, []>,
   SOP2e<op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
   def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
@@ -540,12 +650,28 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
 class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
   SOPK <outs, ins, asm, []>,
   SOPKe <op.SI>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+  let isCodeGenOnly = 0;
+}
 
 class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
   SOPK <outs, ins, asm, []>,
   SOPKe <op.VI>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+  let isCodeGenOnly = 0;
+}
+
+multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
+                   string asm = opName#opAsm> {
+  def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+  def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
+
+  def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
+
+}
 
 multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
   def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
@@ -562,13 +688,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
   def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
     (ins SReg_32:$src0, u16imm:$src1), pattern>;
 
-  def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
-    (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+  let DisableEncoding = "$dst" in {
+    def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
+      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
 
-  def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
-    (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+    def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
+      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+  }
 }
 
+multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
+  op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
+  " $sdst, $simm16"
+>;
+
+multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
+                       string argAsm, string asm = opName#argAsm> {
+
+  def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+  def _si : SOPK <outs, ins, asm, []>,
+            SOPK64e <op.SI>,
+            SIMCInstr<opName, SISubtarget.SI> {
+              let AssemblerPredicates = [isSICI];
+              let isCodeGenOnly = 0;
+            }
+
+  def _vi : SOPK <outs, ins, asm, []>,
+            SOPK64e <op.VI>,
+            SIMCInstr<opName, SISubtarget.VI> {
+              let AssemblerPredicates = [isVI];
+              let isCodeGenOnly = 0;
+            }
+}
 //===----------------------------------------------------------------------===//
 // SMRD classes
 //===----------------------------------------------------------------------===//
@@ -584,13 +736,17 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
                     string asm> :
   SMRD <outs, ins, asm, []>,
   SMRDe <op, imm>,
-  SIMCInstr<opName, SISubtarget.SI>;
+  SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
                     string asm> :
   SMRD <outs, ins, asm, []>,
   SMEMe_vi <op, imm>,
-  SIMCInstr<opName, SISubtarget.VI>;
+  SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
                    string asm, list<dag> pattern> {
@@ -629,8 +785,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
 def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
   let PrintMethod = "printOperandAndMods";
 }
+
+def InputModsMatchClass : AsmOperandClass {
+  let Name = "RegWithInputMods";
+}
+
 def InputModsNoDefault : Operand <i32> {
   let PrintMethod = "printOperandAndMods";
+  let ParserMatchClass = InputModsMatchClass;
 }
 
 class getNumSrcArgs<ValueType Src1, ValueType Src2> {
@@ -838,7 +1000,8 @@ class AtomicNoRet <string noRetOp, bit isRet> {
 class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP1Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr <opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr <opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 
@@ -873,18 +1036,23 @@ multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
 class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP2Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
 
 class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
   VOP2 <op.SI, outs, ins, opName#asm, []>,
-  SIMCInstr <opName#"_e32", SISubtarget.SI>;
+  SIMCInstr <opName#"_e32", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
-  VOP2 <op.SI, outs, ins, opName#asm, []>,
-  SIMCInstr <opName#"_e32", SISubtarget.VI>;
+  VOP2 <op.VI, outs, ins, opName#asm, []>,
+  SIMCInstr <opName#"_e32", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
                      string opName, string revOp> {
@@ -930,7 +1098,8 @@ class VOP3DisableModFields <bit HasSrc0Mods,
 class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOP3Common <outs, ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e64", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e64", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e64", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
@@ -938,22 +1107,30 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
 class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3e <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI>;
+  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3e_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI>;
+  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3be <op>,
-  SIMCInstr<opName#"_e64", SISubtarget.SI>;
+  SIMCInstr<opName#"_e64", SISubtarget.SI> {
+  let AssemblerPredicates = [isSICI];
+}
 
 class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
   VOP3Common <outs, ins, asm, []>,
   VOP3be_vi <op>,
-  SIMCInstr <opName#"_e64", SISubtarget.VI>;
+  SIMCInstr <opName#"_e64", SISubtarget.VI> {
+  let AssemblerPredicates = [isVI];
+}
 
 multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
                    string opName, int NumSrcArgs, bit HasMods = 1> {
@@ -1095,12 +1272,16 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
   }
 
   def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
-            SIMCInstr <opName, SISubtarget.SI>;
+            SIMCInstr <opName, SISubtarget.SI> {
+            let AssemblerPredicates = [isSICI];
+  }
 
   def _vi : VOP3Common <outs, ins, asm, []>,
             VOP3e_vi <op.VI3>,
             VOP3DisableFields <1, 0, 0>,
-            SIMCInstr <opName, SISubtarget.VI>;
+            SIMCInstr <opName, SISubtarget.VI> {
+            let AssemblerPredicates = [isVI];
+  }
 }
 
 multiclass VOP1_Helper <vop1 op, string opName, dag outs,
@@ -1253,7 +1434,8 @@ let isCodeGenOnly = 0 in {
 class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   VOPCCommon <ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
@@ -1504,7 +1686,9 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
 class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
   DS <outs, ins, asm, []>,
   DSe <op>,
-  SIMCInstr <opName, SISubtarget.SI>;
+  SIMCInstr <opName, SISubtarget.SI> {
+  let isCodeGenOnly = 0;
+}
 
 class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
   DS <outs, ins, asm, []>,
@@ -1518,6 +1702,7 @@ class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm
   bits<16> offset;
   let offset0 = offset{7-0};
   let offset1 = offset{15-8};
+  let isCodeGenOnly = 0;
 }
 
 class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
@@ -1545,12 +1730,12 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
 multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs rc:$vdst),
   dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
-                 gds:$gds, M0Reg:$m0),
+                 gds01:$gds, M0Reg:$m0),
   string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
 
-  let data0 = 0, data1 = 0 in {
+  let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
     def _si : DS_Real_si <op, opName, outs, ins, asm>;
     def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
   }
@@ -1574,12 +1759,12 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
 multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs),
   dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
-              ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
+              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0),
   string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
 
-  let vdst = 0 in {
+  let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
     def _si : DS_Real_si <op, opName, outs, ins, asm>;
     def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
   }
@@ -1653,7 +1838,7 @@ multiclass DS_0A_RET <bits<8> op, string opName,
 
 multiclass DS_1A_RET_GDS <bits<8> op, string opName,
   dag outs = (outs VGPR_32:$vdst),
-  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
+  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0),
   string asm = opName#" $vdst, $addr"#"$offset gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
@@ -1762,6 +1947,20 @@ class mubuf <bits<7> si, bits<7> vi = si> {
   field bits<7> VI = vi;
 }
 
+let isCodeGenOnly = 0 in {
+
+class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
+  let lds  = 0;
+}
+
+} // End let isCodeGenOnly = 0
+
+class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+  MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
+  let lds = 0;
+}
+
 class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
   bit IsAddr64 = is_addr64;
   string OpName = NAME # suffix;
@@ -1805,7 +2004,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
   def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
            MUBUFAddr64Table <0>;
 
-  let addr64 = 0 in {
+  let addr64 = 0, isCodeGenOnly = 0 in {
     def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
   }
 
@@ -1818,7 +2017,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
   def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
            MUBUFAddr64Table <1>;
 
-  let addr64 = 1 in {
+  let addr64 = 1, isCodeGenOnly = 0 in {
     def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
   }
 
@@ -1826,11 +2025,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
   // for VI appropriately.
 }
 
-class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
-  let lds = 0;
-}
-
 multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
                                 string asm, list<dag> pattern, bit is_return> {
 
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 95b2470..91e8c8c 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -27,18 +27,10 @@ def SendMsgImm : Operand<i32> {
 }
 
 def isGCN : Predicate<"Subtarget->getGeneration() "
-                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+                      ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+            AssemblerPredicate<"FeatureGCN">;
 def isSI : Predicate<"Subtarget->getGeneration() "
                       "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
-def isSICI : Predicate<
-  "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
-  "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->;
-def isCI : Predicate<"Subtarget->getGeneration() "
-                      ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
-  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
 
 def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
 
@@ -242,9 +234,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
 >;
 } // End Defs = [SCC]
 
-defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
 
 let Uses = [SCC] in {
+  defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
   defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
 } // End Uses = [SCC]
 
@@ -387,6 +379,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
 >;
 */
 
+defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
 defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
 defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
 defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
@@ -400,18 +393,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
 defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
 } // End isCompare = 1
 
-let isCommutable = 1 in {
-  let Defs = [SCC], isCommutable = 1 in {
-    defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
-  }
-  defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
+let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
+    Constraints = "$sdst = $src0" in {
+  defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
+  defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
 }
 
-//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>;
+defm S_CBRANCH_I_FORK : SOPK_m <
+  sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
+  (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
 defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
-defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>;
-defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
-//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>;
+defm S_SETREG_B32 : SOPK_m <
+  sopk<0x13, 0x12>, "s_setreg_b32", (outs),
+  (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
+// FIXME: Not on SI?
+//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
+defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
+  sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
+  (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
+>;
 
 //===----------------------------------------------------------------------===//
 // SOPP Instructions
@@ -1630,7 +1632,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
   VOP_F32_F32_I32, AMDGPUldexp
 >;
 
-
 defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
   VOP_I32_F32_I32>; // TODO: set "Uses = dst"
 
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 7bb5dc2..f289014 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -66,7 +66,7 @@ foreach Index = 0-255 in {
 //===----------------------------------------------------------------------===//
 
 // SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
                             (add (sequence "SGPR%u", 0, 101))>;
 
 // SGPR 64-bit registers
@@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
                                (add (decimate (shl SGPR_32, 15), 4))]>;
 
 // VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
                             (add (sequence "VGPR%u", 0, 255))>;
 
 // VGPR 64-bit registers
@@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
 //  Register classes used as source and destination
 //===----------------------------------------------------------------------===//
 
+class RegImmMatcher<string name> : AsmOperandClass {
+  let Name = name;
+  let RenderMethod = "addRegOrImmOperands";
+}
+
 // Special register classes for predicates and the M0 register
 def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
   let CopyCost = -1; // Theoretically it is possible to read from SCC,
@@ -180,7 +185,7 @@ def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
 def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
 
 // Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
   (add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
 >;
 
@@ -227,15 +232,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
 //  SSrc_* Operands with an SGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
 
-def SSrc_32 : RegImmOperand<SReg_32>;
+def SSrc_32 : RegImmOperand<SReg_32> {
+  let ParserMatchClass = RegImmMatcher<"SSrc32">;
+}
 
-def SSrc_64 : RegImmOperand<SReg_64>;
+def SSrc_64 : RegImmOperand<SReg_64> {
+  let ParserMatchClass = RegImmMatcher<"SSrc64">;
+}
 
 //===----------------------------------------------------------------------===//
 //  SCSrc_* Operands with an SGPR or a inline constant
 //===----------------------------------------------------------------------===//
 
-def SCSrc_32 : RegInlineOperand<SReg_32>;
+def SCSrc_32 : RegInlineOperand<SReg_32> {
+  let ParserMatchClass = RegImmMatcher<"SCSrc32">;
+}
 
 //===----------------------------------------------------------------------===//
 //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
@@ -245,14 +256,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
 
 def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
 
-def VSrc_32 : RegImmOperand<VS_32>;
+def VSrc_32 : RegisterOperand<VS_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc32">;
+}
 
-def VSrc_64 : RegImmOperand<VS_64>;
+def VSrc_64 : RegisterOperand<VS_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_IMM32";
+  let ParserMatchClass = RegImmMatcher<"VSrc64">;
+}
 
 //===----------------------------------------------------------------------===//
 //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
 //===----------------------------------------------------------------------===//
 
-def VCSrc_32 : RegInlineOperand<VS_32>;
+def VCSrc_32 : RegisterOperand<VS_32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
+}
 
-def VCSrc_64 : RegInlineOperand<VS_64>;
+def VCSrc_64 : RegisterOperand<VS_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
+}
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index 27bbf4f..591ce85 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -104,7 +104,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
   SmallVector <Type*, 8> Types;
   bool NeedToReplace = false;
   Function *F = I.getCalledFunction();
-  std::string Name = F->getName().str();
+  std::string Name = F->getName();
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
     Value *Arg = I.getArgOperand(i);
     if (Arg->getType() == v16i8) {
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 5975a51..b6eebb0 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -34,7 +34,7 @@ namespace Sparc {
 #define PRINT_ALIAS_INSTR
 #include "SparcGenAsmWriter.inc"
 
-bool SparcInstPrinter::isV9() const {
+bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
   return (STI.getFeatureBits() & Sparc::FeatureV9) != 0;
 }
 
@@ -44,15 +44,15 @@ void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
 }
 
 void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                               StringRef Annot)
-{
-  if (!printAliasInstr(MI, O) && !printSparcAliasInstr(MI, O))
-    printInstruction(MI, O);
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
+  if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O))
+    printInstruction(MI, STI, O);
   printAnnotation(O, Annot);
 }
 
-bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
-{
+bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI,
+                                            const MCSubtargetInfo &STI,
+                                            raw_ostream &O) {
   switch (MI->getOpcode()) {
   default: return false;
   case SP::JMPLrr:
@@ -72,16 +72,16 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
         case SP::O7: O << "\tretl"; return true;
         }
       }
-      O << "\tjmp "; printMemOperand(MI, 1, O);
+      O << "\tjmp "; printMemOperand(MI, 1, STI, O);
       return true;
     case SP::O7: // call $addr
-      O << "\tcall "; printMemOperand(MI, 1, O);
+      O << "\tcall "; printMemOperand(MI, 1, STI, O);
       return true;
     }
   }
   case SP::V9FCMPS:  case SP::V9FCMPD:  case SP::V9FCMPQ:
   case SP::V9FCMPES: case SP::V9FCMPED: case SP::V9FCMPEQ: {
-    if (isV9()
+    if (isV9(STI)
         || (MI->getNumOperands() != 3)
         || (!MI->getOperand(0).isReg())
         || (MI->getOperand(0).getReg() != SP::FCC0))
@@ -96,17 +96,17 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
     case SP::V9FCMPED: O << "\tfcmped "; break;
     case SP::V9FCMPEQ: O << "\tfcmpeq "; break;
     }
-    printOperand(MI, 1, O);
+    printOperand(MI, 1, STI, O);
     O << ", ";
-    printOperand(MI, 2, O);
+    printOperand(MI, 2, STI, O);
     return true;
   }
   }
 }
 
 void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
-                                    raw_ostream &O)
-{
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
   const MCOperand &MO = MI->getOperand (opNum);
 
   if (MO.isReg()) {
@@ -124,14 +124,14 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
 }
 
 void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
-                                      raw_ostream &O, const char *Modifier)
-{
-  printOperand(MI, opNum, O);
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O, const char *Modifier) {
+  printOperand(MI, opNum, STI, O);
 
   // If this is an ADD operand, emit it like normal operands.
   if (Modifier && !strcmp(Modifier, "arith")) {
     O << ", ";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum+1, STI, O);
     return;
   }
   const MCOperand &MO = MI->getOperand(opNum+1);
@@ -143,12 +143,12 @@ void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
 
   O << "+";
 
-  printOperand(MI, opNum+1, O);
+  printOperand(MI, opNum+1, STI, O);
 }
 
 void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
-                                     raw_ostream &O)
-{
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   int CC = (int)MI->getOperand(opNum).getImm();
   switch (MI->getOpcode()) {
   default: break;
@@ -171,8 +171,8 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
 }
 
 bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
-                                  raw_ostream &O)
-{
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
   llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
   return true;
 }
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index c96d5ad..0b01b88 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -22,32 +22,36 @@ namespace llvm {
 class MCOperand;
 
 class SparcInstPrinter : public MCInstPrinter {
-  const MCSubtargetInfo &STI;
 public:
- SparcInstPrinter(const MCAsmInfo &MAI,
-                  const MCInstrInfo &MII,
-                  const MCRegisterInfo &MRI,
-                  const MCSubtargetInfo &sti)
-   : MCInstPrinter(MAI, MII, MRI), STI(sti) {}
+  SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                   const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
-  bool printSparcAliasInstr(const MCInst *MI, raw_ostream &OS);
-  bool isV9() const;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                            raw_ostream &OS);
+  bool isV9(const MCSubtargetInfo &STI) const;
 
   // Autogenerated by tblgen.
-  void printInstruction(const MCInst *MI, raw_ostream &O);
-  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+                       raw_ostream &O);
   void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
-                               unsigned PrintMethodIdx, raw_ostream &O);
+                               unsigned PrintMethodIdx,
+                               const MCSubtargetInfo &STI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
-  void printOperand(const MCInst *MI, int opNum, raw_ostream &OS);
-  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS,
-                       const char *Modifier = nullptr);
-  void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS);
-  bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-
+  void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                    raw_ostream &OS);
+  void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                       raw_ostream &OS, const char *Modifier = nullptr);
+  void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+                      raw_ostream &OS);
+  bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &OS);
 };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index dcd81e3..4abb6b8 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -244,7 +244,7 @@ namespace {
 
     }
 
-    MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+    MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
       uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType);
       return createSparcELFObjectWriter(OS, is64Bit(), OSABI);
     }
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 5ba82f1..98ba7e6 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -26,7 +26,8 @@ namespace {
                                 Is64Bit ?  ELF::EM_SPARCV9 : ELF::EM_SPARC,
                                 /*HasRelocationAddend*/ true) {}
 
-    virtual ~SparcELFObjectWriter() {}
+    ~SparcELFObjectWriter() override {}
+
   protected:
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -104,9 +105,8 @@ unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target,
   return ELF::R_SPARC_NONE;
 }
 
-MCObjectWriter *llvm::createSparcELFObjectWriter(raw_ostream &OS,
-                                                 bool Is64Bit,
-                                                 uint8_t OSABI) {
+MCObjectWriter *llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS,
+                                                 bool Is64Bit, uint8_t OSABI) {
   MCELFObjectTargetWriter *MOTW = new SparcELFObjectWriter(Is64Bit, OSABI);
   return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/false);
 }
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 598856f..b447ab3 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -38,7 +38,7 @@ class SparcMCCodeEmitter : public MCCodeEmitter {
 public:
   SparcMCCodeEmitter(MCContext &ctx): Ctx(ctx) {}
 
-  ~SparcMCCodeEmitter() {}
+  ~SparcMCCodeEmitter() override {}
 
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 630ed1b..7895404 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -134,13 +134,12 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
   return new SparcTargetAsmStreamer(S, OS);
 }
 
-static MCInstPrinter *createSparcMCInstPrinter(const Target &T,
-                                              unsigned SyntaxVariant,
-                                              const MCAsmInfo &MAI,
-                                              const MCInstrInfo &MII,
-                                              const MCRegisterInfo &MRI,
-                                              const MCSubtargetInfo &STI) {
-  return new SparcInstPrinter(MAI, MII, MRI, STI);
+static MCInstPrinter *createSparcMCInstPrinter(const Triple &T,
+                                               unsigned SyntaxVariant,
+                                               const MCAsmInfo &MAI,
+                                               const MCInstrInfo &MII,
+                                               const MCRegisterInfo &MRI) {
+  return new SparcInstPrinter(MAI, MII, MRI);
 }
 
 extern "C" void LLVMInitializeSparcTargetMC() {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index d2ec991..5f38b12 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -26,6 +26,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
 class StringRef;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheSparcTarget;
@@ -38,8 +39,7 @@ MCAsmBackend *createSparcAsmBackend(const Target &T,
                                     const MCRegisterInfo &MRI,
                                     StringRef TT,
                                     StringRef CPU);
-MCObjectWriter *createSparcELFObjectWriter(raw_ostream &OS,
-                                           bool Is64Bit,
+MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                            uint8_t OSABI);
 } // End llvm namespace
 
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 3159a46..c34122e 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -92,8 +92,15 @@ def : Proc<"niagara4",        [FeatureV9, FeatureV8Deprecated, UsePopc,
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
+def SparcAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  int PassSubtarget = 1;
+  int Variant = 0;
+}
+
 def Sparc : Target {
   // Pull in Instruction Info:
   let InstructionSet = SparcInstrInfo;
   let AssemblyParsers  = [SparcAsmParser];
+  let AssemblyWriters = [SparcAsmWriter];
 }
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 0439f9d..56290e2 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -58,7 +58,6 @@ namespace {
 
     void EmitFunctionBodyStart() override;
     void EmitInstruction(const MachineInstr *MI) override;
-    void EmitEndOfAsmFile(Module &M) override;
 
     static const char *getRegisterName(unsigned RegNo) {
       return SparcInstPrinter::getRegisterName(RegNo);
@@ -442,23 +441,6 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-void SparcAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  const TargetLoweringObjectFileELF &TLOFELF =
-    static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-  MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-  // Generate stubs for global variables.
-  MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-  if (!Stubs.empty()) {
-    OutStreamer.SwitchSection(TLOFELF.getDataSection());
-    unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      OutStreamer.EmitLabel(Stubs[i].first);
-      OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), PtrSize);
-    }
-  }
-}
-
 // Force static initialization.
 extern "C" void LLVMInitializeSparcAsmPrinter() {
   RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index c8b0570..5b964af 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -727,7 +727,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
 
     Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
                           false,        // isVolatile,
-                          (Size <= 32), // AlwaysInline if size <= 32
+                          (Size <= 32), // AlwaysInline if size <= 32,
+                          false,        // isTailCall
                           MachinePointerInfo(), MachinePointerInfo());
     ByValArgs.push_back(FIPtr);
   }
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index a3a21d6..6818291 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -23,7 +23,7 @@ class SparcTargetMachine;
 class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
   explicit SparcSelectionDAGInfo(const DataLayout &DL);
-  ~SparcSelectionDAGInfo();
+  ~SparcSelectionDAGInfo() override;
 };
 
 }
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 60a3912..336f037 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_target(SystemZCodeGen
   SystemZShortenInst.cpp
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
+  SystemZTargetTransformInfo.cpp
   )
 
 add_subdirectory(AsmParser)
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 23173bf..84400f8 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -25,7 +25,7 @@ class SystemZDisassembler : public MCDisassembler {
 public:
   SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
     : MCDisassembler(STI, Ctx) {}
-  virtual ~SystemZDisassembler() {}
+  ~SystemZDisassembler() override {}
 
   DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 996a492..cf1ee54 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -43,7 +43,8 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
 }
 
 void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                   StringRef Annot) {
+                                   StringRef Annot,
+                                   const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 732e5fa..6f56c7b 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -39,7 +39,8 @@ public:
 
   // Override MCInstPrinter.
   void printRegName(raw_ostream &O, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
 private:
   // Print various types of operand.
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
index 542aaee..6f8431d 100644
--- a/lib/Target/SystemZ/LLVMBuild.txt
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
 type = Library
 name = SystemZCodeGen
 parent = SystemZ
-required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
 add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index b79b1d8..1c3887a 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -62,7 +62,7 @@ public:
     llvm_unreachable("SystemZ does do not have assembler relaxation");
   }
   bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createSystemZObjectWriter(OS, OSABI);
   }
 };
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 40dc48e..8dd70b9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -32,7 +32,7 @@ public:
     : MCII(mcii), Ctx(ctx) {
   }
 
-  ~SystemZMCCodeEmitter() {}
+  ~SystemZMCCodeEmitter() override {}
 
   // OVerride MCCodeEmitter.
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 2632518..ee1af02 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -20,7 +20,7 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter {
 public:
   SystemZObjectWriter(uint8_t OSABI);
 
-  virtual ~SystemZObjectWriter();
+  ~SystemZObjectWriter() override;
 
 protected:
   // Override MCELFObjectTargetWriter.
@@ -152,7 +152,7 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
   }
 }
 
-MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS,
                                                 uint8_t OSABI) {
   MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
   return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index ffd05a9..ea56fb1 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -172,12 +172,11 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
+static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
                                                  unsigned SyntaxVariant,
                                                  const MCAsmInfo &MAI,
                                                  const MCInstrInfo &MII,
-                                                 const MCRegisterInfo &MRI,
-                                                 const MCSubtargetInfo &STI) {
+                                                 const MCRegisterInfo &MRI) {
   return new SystemZInstPrinter(MAI, MII, MRI);
 }
 
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 962c950..2b2647b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -23,6 +23,7 @@ class MCRegisterInfo;
 class MCSubtargetInfo;
 class StringRef;
 class Target;
+class raw_pwrite_stream;
 class raw_ostream;
 
 extern Target TheSystemZTarget;
@@ -77,7 +78,7 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
                                         StringRef TT, StringRef CPU);
 
-MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
+MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
 } // end namespace llvm
 
 // Defines symbolic names for SystemZ registers.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index 5f17edb..b3a7310 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -68,6 +68,18 @@ const unsigned CCMASK_TM_MSB_0       = CCMASK_0 | CCMASK_1;
 const unsigned CCMASK_TM_MSB_1       = CCMASK_2 | CCMASK_3;
 const unsigned CCMASK_TM             = CCMASK_ANY;
 
+// Condition-code mask assignments for TRANSACTION_BEGIN.
+const unsigned CCMASK_TBEGIN_STARTED       = CCMASK_0;
+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
+const unsigned CCMASK_TBEGIN_TRANSIENT     = CCMASK_2;
+const unsigned CCMASK_TBEGIN_PERSISTENT    = CCMASK_3;
+const unsigned CCMASK_TBEGIN               = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_END.
+const unsigned CCMASK_TEND_TX   = CCMASK_0;
+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
+const unsigned CCMASK_TEND      = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
+
 // The position of the low CC bit in an IPM result.
 const unsigned IPM_CC = 28;
 
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 18e37e3..2524733 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -255,29 +255,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
-  if (Triple(TM.getTargetTriple()).isOSBinFormatELF()) {
-    auto &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
-
-      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-        OutStreamer.EmitLabel(Stubs[i].first);
-        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(0));
-      }
-      Stubs.clear();
-    }
-  }
-}
-
 // Force static initialization.
 extern "C" void LLVMInitializeSystemZAsmPrinter() {
   RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index a4d5b78..7f6e823 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -38,7 +38,6 @@ public:
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
                              unsigned AsmVariant, const char *ExtraCode,
                              raw_ostream &OS) override;
-  void EmitEndOfAsmFile(Module &M) override;
 };
 } // end namespace llvm
 
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index a52aa25..1a58b53 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -898,6 +898,9 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
   }  
 
   unsigned Opcode = SystemZ::RISBG;
+  // Prefer RISBGN if available, since it does not clobber CC.
+  if (Subtarget->hasMiscellaneousExtensions())
+    Opcode = SystemZ::RISBGN;
   EVT OpcodeVT = MVT::i64;
   if (VT == MVT::i32 && Subtarget->hasHighWord()) {
     Opcode = SystemZ::RISBMux;
@@ -945,9 +948,13 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
 
   // See whether we can avoid an AND in the first operand by converting
   // ROSBG to RISBG.
-  if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask))
+  if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) {
     Opcode = SystemZ::RISBG;
-           
+    // Prefer RISBGN if available, since it does not clobber CC.
+    if (Subtarget->hasMiscellaneousExtensions())
+      Opcode = SystemZ::RISBGN;
+  }
+
   EVT VT = N->getValueType(0);
   SDValue Ops[5] = {
     convertTo(SDLoc(N), MVT::i64, Op0),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 0ca8bcd..21882cb 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
 #include <cctype>
 
 using namespace llvm;
@@ -163,8 +164,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
       // available, or if the operand is constant.
       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
 
+      // Use POPCNT on z196 and above.
+      if (Subtarget.hasPopulationCount())
+        setOperationAction(ISD::CTPOP, VT, Custom);
+      else
+        setOperationAction(ISD::CTPOP, VT, Expand);
+
       // No special instructions for these.
-      setOperationAction(ISD::CTPOP,           VT, Expand);
       setOperationAction(ISD::CTTZ,            VT, Expand);
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -299,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
   // Codes for which we want to perform some z-specific combinations.
   setTargetDAGCombine(ISD::SIGN_EXTEND);
 
+  // Handle intrinsics.
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
   // We want to use MVC in preference to even a single load/store pair.
   MaxStoresPerMemcpy = 0;
   MaxStoresPerMemcpyOptSize = 0;
@@ -342,6 +351,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   return Imm.isZero() || Imm.isNegZero();
 }
 
+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  // We can use CGFI or CLGFI.
+  return isInt<32>(Imm) || isUInt<32>(Imm);
+}
+
+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+  // We can use ALGFI or SLGFI.
+  return isUInt<32>(Imm) || isUInt<32>(-Imm);
+}
+
 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                            unsigned,
                                                            unsigned,
@@ -1016,6 +1035,53 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
 }
 
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument.  Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+                                      unsigned &CCValid) {
+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  switch (Id) {
+  case Intrinsic::s390_tbegin:
+    Opcode = SystemZISD::TBEGIN;
+    CCValid = SystemZ::CCMASK_TBEGIN;
+    return true;
+
+  case Intrinsic::s390_tbegin_nofloat:
+    Opcode = SystemZISD::TBEGIN_NOFLOAT;
+    CCValid = SystemZ::CCMASK_TBEGIN;
+    return true;
+
+  case Intrinsic::s390_tend:
+    Opcode = SystemZISD::TEND;
+    CCValid = SystemZ::CCMASK_TEND;
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+                                             unsigned Opcode) {
+  // Copy all operands except the intrinsic ID.
+  unsigned NumOps = Op.getNumOperands();
+  SmallVector<SDValue, 6> Ops;
+  Ops.reserve(NumOps - 1);
+  Ops.push_back(Op.getOperand(0));
+  for (unsigned I = 2; I < NumOps; ++I)
+    Ops.push_back(Op.getOperand(I));
+
+  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+  SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+  SDValue OldChain = SDValue(Op.getNode(), 1);
+  SDValue NewChain = SDValue(Intr.getNode(), 0);
+  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+  return Intr;
+}
+
 // CC is a comparison that will be implemented using an integer or
 // floating-point comparison.  Return the condition code mask for
 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
@@ -1530,6 +1596,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
     MaskVal = -(CmpVal & -CmpVal);
     NewC.ICmpType = SystemZICMP::UnsignedOnly;
   }
+  if (!MaskVal)
+    return;
 
   // Check whether the combination of mask, comparison value and comparison
   // type are suitable.
@@ -1571,9 +1639,53 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
   C.CCMask = NewCCMask;
 }
 
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+                                  SDValue Call, unsigned CCValid, uint64_t CC,
+                                  ISD::CondCode Cond) {
+  Comparison C(Call, SDValue());
+  C.Opcode = Opcode;
+  C.CCValid = CCValid;
+  if (Cond == ISD::SETEQ)
+    // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+    C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+  else if (Cond == ISD::SETNE)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+    // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+    // always true for CC>3.
+    C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+    // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+    // always true for CC>3.
+    C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+    // ...and the inverse of that.
+    C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+  else
+    llvm_unreachable("Unexpected integer comparison type");
+  C.CCMask &= CCValid;
+  return C;
+}
+
 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
                          ISD::CondCode Cond) {
+  if (CmpOp1.getOpcode() == ISD::Constant) {
+    uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+    unsigned Opcode, CCValid;
+    if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+        isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+  }
   Comparison C(CmpOp0, CmpOp1);
   C.CCMask = CCMaskForCondCode(Cond);
   if (C.Op0.getValueType().isFloatingPoint()) {
@@ -1615,6 +1727,17 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
 
 // Emit the comparison instruction described by C.
 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+  if (!C.Op1.getNode()) {
+    SDValue Op;
+    switch (C.Op0.getOpcode()) {
+    case ISD::INTRINSIC_W_CHAIN:
+      Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+      break;
+    default:
+      llvm_unreachable("Invalid comparison operands");
+    }
+    return SDValue(Op.getNode(), Op->getNumValues() - 1);
+  }
   if (C.Opcode == SystemZISD::ICMP)
     return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
                        DAG.getConstant(C.ICmpType, MVT::i32));
@@ -1696,7 +1819,6 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
 }
 
 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain    = Op.getOperand(0);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   SDValue CmpOp0   = Op.getOperand(2);
   SDValue CmpOp1   = Op.getOperand(3);
@@ -1706,7 +1828,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
   SDValue Glue = emitCmp(DAG, DL, C);
   return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
-                     Chain, DAG.getConstant(C.CCValid, MVT::i32),
+                     Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32),
                      DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
 }
 
@@ -2100,6 +2222,7 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
 
   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
                        /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+                       /*isTailCall*/false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
@@ -2292,6 +2415,46 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
                                    MVT::i64, HighOp, Low32);
 }
 
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int64_t OrigBitSize = VT.getSizeInBits();
+  SDLoc DL(Op);
+
+  // Get the known-zero mask for the operand.
+  Op = Op.getOperand(0);
+  APInt KnownZero, KnownOne;
+  DAG.computeKnownBits(Op, KnownZero, KnownOne);
+  unsigned NumSignificantBits = (~KnownZero).getActiveBits();
+  if (NumSignificantBits == 0)
+    return DAG.getConstant(0, VT);
+
+  // Skip known-zero high parts of the operand.
+  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
+  BitSize = std::min(BitSize, OrigBitSize);
+
+  // The POPCNT instruction counts the number of bits in each byte.
+  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+  // Add up per-byte counts in a binary tree.  All bits of Op at
+  // position larger than BitSize remain zero throughout.
+  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+    SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT));
+    if (BitSize != OrigBitSize)
+      Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+                        DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT));
+    Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+  }
+
+  // Extract overall result from high byte.
+  if (BitSize > 8)
+    Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT));
+
+  return Op;
+}
+
 // Op is an atomic load.  Lower it into a normal volatile load.
 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
                                                 SelectionDAG &DAG) const {
@@ -2505,6 +2668,30 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
                                  Node->getMemoryVT(), Node->getMemOperand());
 }
 
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+  SDValue Glue = SDValue(After, After->getNumValues() - 1);
+  SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue);
+  return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM,
+                     DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  unsigned Opcode, CCValid;
+  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+    assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+    SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+    SDValue CC = getCCResult(DAG, Glued.getNode());
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+    return SDValue();
+  }
+
+  return SDValue();
+}
+
 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
@@ -2542,6 +2729,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerUDIVREM(Op, DAG);
   case ISD::OR:
     return lowerOR(Op, DAG);
+  case ISD::CTPOP:
+    return lowerCTPOP(Op, DAG);
   case ISD::ATOMIC_SWAP:
     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
   case ISD::ATOMIC_STORE:
@@ -2576,6 +2765,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
     return lowerSTACKRESTORE(Op, DAG);
   case ISD::PREFETCH:
     return lowerPREFETCH(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return lowerINTRINSIC_W_CHAIN(Op, DAG);
   default:
     llvm_unreachable("Unexpected node to lower");
   }
@@ -2616,6 +2807,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(SEARCH_STRING);
     OPCODE(IPM);
     OPCODE(SERIALIZE);
+    OPCODE(TBEGIN);
+    OPCODE(TBEGIN_NOFLOAT);
+    OPCODE(TEND);
     OPCODE(ATOMIC_SWAPW);
     OPCODE(ATOMIC_LOADW_ADD);
     OPCODE(ATOMIC_LOADW_SUB);
@@ -3443,6 +3637,50 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
   return DoneMBB;
 }
 
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *
+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
+                                            MachineBasicBlock *MBB,
+                                            unsigned Opcode,
+                                            bool NoFloat) const {
+  MachineFunction &MF = *MBB->getParent();
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+  // Update opcode.
+  MI->setDesc(TII->get(Opcode));
+
+  // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+  // Make sure to add the corresponding GRSM bits if they are missing.
+  uint64_t Control = MI->getOperand(2).getImm();
+  static const unsigned GPRControlBit[16] = {
+    0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+    0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+  };
+  Control |= GPRControlBit[15];
+  if (TFI->hasFP(MF))
+    Control |= GPRControlBit[11];
+  MI->getOperand(2).setImm(Control);
+
+  // Add GPR clobbers.
+  for (int I = 0; I < 16; I++) {
+    if ((Control & GPRControlBit[I]) == 0) {
+      unsigned Reg = SystemZMC::GR64Regs[I];
+      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+    }
+  }
+
+  // Add FPR clobbers.
+  if (!NoFloat && (Control & 4) != 0) {
+    for (int I = 0; I < 16; I++) {
+      unsigned Reg = SystemZMC::FP64Regs[I];
+      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+    }
+  }
+
+  return MBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::
 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
   switch (MI->getOpcode()) {
@@ -3684,6 +3922,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
     return emitStringWrapper(MI, MBB, SystemZ::MVST);
   case SystemZ::SRSTLoop:
     return emitStringWrapper(MI, MBB, SystemZ::SRST);
+  case SystemZ::TBEGIN:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+  case SystemZ::TBEGIN_nofloat:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+  case SystemZ::TBEGINC:
+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
   default:
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 23c62c9..56d7ef4 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -87,6 +87,9 @@ enum {
   // the number of the register.
   EXTRACT_ACCESS,
 
+  // Count number of bits set in operand 0 per byte.
+  POPCNT,
+
   // Wrappers around the ISD opcodes of the same name.  The output and
   // first input operands are GR128s.  The trailing numbers are the
   // widths of the second operand in bits.
@@ -143,6 +146,15 @@ enum {
   // Perform a serialization operation.  (BCR 15,0 or BCR 14,0.)
   SERIALIZE,
 
+  // Transaction begin.  The first operand is the chain, the second
+  // the TDB pointer, and the third the immediate control field.
+  // Returns chain and glue.
+  TBEGIN,
+  TBEGIN_NOFLOAT,
+
+  // Transaction end.  Just the chain operand.  Returns chain and glue.
+  TEND,
+
   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
   // ATOMIC_LOAD_<op>.
   //
@@ -213,6 +225,8 @@ public:
   EVT getSetCCResultType(LLVMContext &, EVT) const override;
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+  bool isLegalICmpImmediate(int64_t Imm) const override;
+  bool isLegalAddImmediate(int64_t Imm) const override;
   bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
@@ -302,6 +316,7 @@ private:
   SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
@@ -312,6 +327,7 @@ private:
   SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
 
   // If the last instruction before MBBI in MBB was some form of COMPARE,
   // try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -349,6 +365,10 @@ private:
   MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
                                        MachineBasicBlock *BB,
                                        unsigned Opcode) const;
+  MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+                                          MachineBasicBlock *MBB,
+                                          unsigned Opcode,
+                                          bool NoFloat) const;
 };
 } // end namespace llvm
 
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 9f59a1c..2d3c9e2 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -473,6 +473,17 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{15-0}  = BD2;
 }
 
+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<16> BD2;
+
+  let Inst{31-16} = op;
+  let Inst{15-0}  = BD2;
+}
+
 //===----------------------------------------------------------------------===//
 // Instruction definitions with semantics
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5128993..3a02859 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -723,9 +723,12 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     unsigned Start, End;
     if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
       unsigned NewOpcode;
-      if (And.RegSize == 64)
+      if (And.RegSize == 64) {
         NewOpcode = SystemZ::RISBG;
-      else {
+        // Prefer RISBGN if available, since it does not clobber CC.
+        if (STI.hasMiscellaneousExtensions())
+          NewOpcode = SystemZ::RISBGN;
+      } else {
         NewOpcode = SystemZ::RISBMux;
         Start &= 31;
         End &= 31;
@@ -1146,17 +1149,22 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
 
 unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
   switch (Opcode) {
-  case SystemZ::L:    return SystemZ::LT;
-  case SystemZ::LY:   return SystemZ::LT;
-  case SystemZ::LG:   return SystemZ::LTG;
-  case SystemZ::LGF:  return SystemZ::LTGF;
-  case SystemZ::LR:   return SystemZ::LTR;
-  case SystemZ::LGFR: return SystemZ::LTGFR;
-  case SystemZ::LGR:  return SystemZ::LTGR;
-  case SystemZ::LER:  return SystemZ::LTEBR;
-  case SystemZ::LDR:  return SystemZ::LTDBR;
-  case SystemZ::LXR:  return SystemZ::LTXBR;
-  default:            return 0;
+  case SystemZ::L:      return SystemZ::LT;
+  case SystemZ::LY:     return SystemZ::LT;
+  case SystemZ::LG:     return SystemZ::LTG;
+  case SystemZ::LGF:    return SystemZ::LTGF;
+  case SystemZ::LR:     return SystemZ::LTR;
+  case SystemZ::LGFR:   return SystemZ::LTGFR;
+  case SystemZ::LGR:    return SystemZ::LTGR;
+  case SystemZ::LER:    return SystemZ::LTEBR;
+  case SystemZ::LDR:    return SystemZ::LTDBR;
+  case SystemZ::LXR:    return SystemZ::LTXBR;
+  // On zEC12 we prefer to use RISBGN.  But if there is a chance to
+  // actually use the condition code, we may turn it back into RISGB.
+  // Note that RISBG is not really a "load-and-test" instruction,
+  // but sets the same condition code values, so is OK to use here.
+  case SystemZ::RISBGN: return SystemZ::RISBG;
+  default:              return 0;
   }
 }
 
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index a7f7747..820f30b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1061,6 +1061,10 @@ let Defs = [CC] in {
     def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
 }
 
+// On zEC12 we have a variant of RISBG that does not set CC.
+let Predicates = [FeatureMiscellaneousExtensions] in
+  def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>;
+
 // Forms of RISBG that only affect one word of the destination register.
 // They do not set CC.
 let Predicates = [FeatureHighWord] in {
@@ -1358,6 +1362,60 @@ let Defs = [CC] in {
 }
 
 //===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureTransactionalExecution] in {
+  // Transaction Begin
+  let hasSideEffects = 1, mayStore = 1,
+      usesCustomInserter = 1, Defs = [CC] in {
+    def TBEGIN : InstSIL<0xE560,
+                         (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                         "tbegin\t$BD1, $I2",
+                         [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
+    def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                                [(z_tbegin_nofloat bdaddr12only:$BD1,
+                                                   imm32zx16:$I2)]>;
+    def TBEGINC : InstSIL<0xE561,
+                          (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+                          "tbeginc\t$BD1, $I2",
+                          [(int_s390_tbeginc bdaddr12only:$BD1,
+                                             imm32zx16:$I2)]>;
+  }
+
+  // Transaction End
+  let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
+    def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
+
+  // Transaction Abort
+  let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
+    def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
+                       "tabort\t$BD2",
+                       [(int_s390_tabort bdaddr12only:$BD2)]>;
+
+  // Nontransactional Store
+  let hasSideEffects = 1 in
+    def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+
+  // Extract Transaction Nesting Depth
+  let hasSideEffects = 1 in
+    def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureProcessorAssist] in {
+  let hasSideEffects = 1, R4 = 0 in
+    def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
+                      "ppa\t$R1, $R2, $R3", []>;
+  def : Pat<(int_s390_ppa_txassist GR32:$src),
+            (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+                 0, 1)>;
+}
+
+//===----------------------------------------------------------------------===//
 // Miscellaneous Instructions.
 //===----------------------------------------------------------------------===//
 
@@ -1382,6 +1440,13 @@ let Defs = [CC] in {
 def : Pat<(ctlz GR64:$src),
           (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
 
+// Population count.  Counts bits set per byte.
+let Predicates = [FeaturePopulationCount], Defs = [CC] in {
+  def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
+                       "popcnt\t$R1, $R2",
+                       [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
+}
+
 // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
 def : Pat<(i64 (anyext GR32:$src)),
           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 51ac5da..3151052 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,9 @@ def SDT_ZI32Intrinsic       : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
 def SDT_ZPrefetch           : SDTypeProfile<0, 2,
                                             [SDTCisVT<0, i32>,
                                              SDTCisPtrTy<1>]>;
+def SDT_ZTBegin             : SDTypeProfile<0, 2,
+                                            [SDTCisPtrTy<0>,
+                                             SDTCisVT<1, i32>]>;
 
 //===----------------------------------------------------------------------===//
 // Node definitions
@@ -121,6 +124,7 @@ def z_select_ccmask     : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
 def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
 def z_extract_access    : SDNode<"SystemZISD::EXTRACT_ACCESS",
                                  SDT_ZExtractAccess>;
+def z_popcnt            : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
 def z_umul_lohi64       : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
 def z_sdivrem32         : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
 def z_sdivrem64         : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
@@ -179,6 +183,15 @@ def z_prefetch          : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
                                  [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
                                   SDNPMemOperand]>;
 
+def z_tbegin            : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+                                  SDNPSideEffect]>;
+def z_tbegin_nofloat    : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+                                  SDNPSideEffect]>;
+def z_tend              : SDNode<"SystemZISD::TEND", SDTNone,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 1594854..15614c9 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
   "Assume that the floating-point extension facility is installed"
 >;
 
+def FeaturePopulationCount : SystemZFeature<
+  "population-count", "PopulationCount",
+  "Assume that the population-count facility is installed"
+>;
+
 def FeatureFastSerialization : SystemZFeature<
   "fast-serialization", "FastSerialization",
   "Assume that the fast-serialization facility is installed"
@@ -50,13 +55,30 @@ def FeatureInterlockedAccess1 : SystemZFeature<
 >;
 def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
 
+def FeatureMiscellaneousExtensions : SystemZFeature<
+  "miscellaneous-extensions", "MiscellaneousExtensions",
+  "Assume that the miscellaneous-extensions facility is installed"
+>;
+
+def FeatureTransactionalExecution : SystemZFeature<
+  "transactional-execution", "TransactionalExecution",
+  "Assume that the transactional-execution facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+  "processor-assist", "ProcessorAssist",
+  "Assume that the processor-assist facility is installed"
+>;
+
 def : Processor<"generic", NoItineraries, []>;
 def : Processor<"z10", NoItineraries, []>;
 def : Processor<"z196", NoItineraries,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeatureFastSerialization,
-                 FeatureInterlockedAccess1]>;
+                 FeatureFPExtension, FeaturePopulationCount,
+                 FeatureFastSerialization, FeatureInterlockedAccess1]>;
 def : Processor<"zEC12", NoItineraries,
                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeatureFastSerialization,
-                 FeatureInterlockedAccess1]>;
+                 FeatureFPExtension, FeaturePopulationCount,
+                 FeatureFastSerialization, FeatureInterlockedAccess1,
+                 FeatureMiscellaneousExtensions,
+                 FeatureTransactionalExecution, FeatureProcessorAssist]>;
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 31a2bff..de725ae 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -10,7 +10,6 @@
 #include "SystemZSubtarget.h"
 #include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/Host.h"
 
 using namespace llvm;
 
@@ -28,10 +27,6 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
   std::string CPUName = CPU;
   if (CPUName.empty())
     CPUName = "generic";
-#if defined(__linux__) && defined(__s390x__)
-  if (CPUName == "generic")
-    CPUName = sys::getHostCPUName();
-#endif
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
   return *this;
@@ -43,7 +38,9 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
                                    const TargetMachine &TM)
     : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
       HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
-      HasFastSerialization(false), HasInterlockedAccess1(false),
+      HasPopulationCount(false), HasFastSerialization(false),
+      HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
+      HasTransactionalExecution(false), HasProcessorAssist(false),
       TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
       TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
 
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 99cb1ad..c99e552 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -38,8 +38,12 @@ protected:
   bool HasLoadStoreOnCond;
   bool HasHighWord;
   bool HasFPExtension;
+  bool HasPopulationCount;
   bool HasFastSerialization;
   bool HasInterlockedAccess1;
+  bool HasMiscellaneousExtensions;
+  bool HasTransactionalExecution;
+  bool HasProcessorAssist;
 
 private:
   Triple TargetTriple;
@@ -86,12 +90,26 @@ public:
   // Return true if the target has the floating-point extension facility.
   bool hasFPExtension() const { return HasFPExtension; }
 
+  // Return true if the target has the population-count facility.
+  bool hasPopulationCount() const { return HasPopulationCount; }
+
   // Return true if the target has the fast-serialization facility.
   bool hasFastSerialization() const { return HasFastSerialization; }
 
   // Return true if the target has interlocked-access facility 1.
   bool hasInterlockedAccess1() const { return HasInterlockedAccess1; }
 
+  // Return true if the target has the miscellaneous-extensions facility.
+  bool hasMiscellaneousExtensions() const {
+    return HasMiscellaneousExtensions;
+  }
+
+  // Return true if the target has the transactional-execution facility.
+  bool hasTransactionalExecution() const { return HasTransactionalExecution; }
+
+  // Return true if the target has the processor-assist facility.
+  bool hasProcessorAssist() const { return HasProcessorAssist; }
+
   // Return true if GV can be accessed using LARL for reloc model RM
   // and code model CM.
   bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 86baccb..b2f8175 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZTargetMachine.h"
+#include "SystemZTargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
@@ -108,3 +109,9 @@ void SystemZPassConfig::addPreEmitPass() {
 TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new SystemZPassConfig(this, PM);
 }
+
+TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(SystemZTTIImpl(this, F));
+  });
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 181b926..5ded07c 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -39,6 +39,7 @@ public:
   }
   // Override LLVMTargetMachine
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
new file mode 100644
index 0000000..3337f63
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -0,0 +1,240 @@
+//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a TargetTransformInfo analysis pass specific to the
+// SystemZ target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "systemztti"
+
+//===----------------------------------------------------------------------===//
+//
+// SystemZ cost model.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  if (Imm == 0)
+    return TTI::TCC_Free;
+
+  if (Imm.getBitWidth() <= 64) {
+    // Constants loaded via lgfi.
+    if (isInt<32>(Imm.getSExtValue()))
+      return TTI::TCC_Basic;
+    // Constants loaded via llilf.
+    if (isUInt<32>(Imm.getZExtValue()))
+      return TTI::TCC_Basic;
+    // Constants loaded via llihf:
+    if ((Imm.getZExtValue() & 0xffffffff) == 0)
+      return TTI::TCC_Basic;
+
+    return 2 * TTI::TCC_Basic;
+  }
+
+  return 4 * TTI::TCC_Basic;
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  switch (Opcode) {
+  default:
+    return TTI::TCC_Free;
+  case Instruction::GetElementPtr:
+    // Always hoist the base address of a GetElementPtr. This prevents the
+    // creation of new constants for every base constant that gets constant
+    // folded with the offset.
+    if (Idx == 0)
+      return 2 * TTI::TCC_Basic;
+    return TTI::TCC_Free;
+  case Instruction::Store:
+    if (Idx == 0 && Imm.getBitWidth() <= 64) {
+      // Any 8-bit immediate store can by implemented via mvi.
+      if (BitSize == 8)
+        return TTI::TCC_Free;
+      // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
+      if (isInt<16>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::ICmp:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Comparisons against signed 32-bit immediates implemented via cgfi.
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+      // Comparisons against unsigned 32-bit immediates implemented via clgfi.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // Or their negation, by swapping addition vs. subtraction.
+      if (isUInt<32>(-Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Mul:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // We use msgfi to multiply by 32-bit signed immediates.
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Or:
+  case Instruction::Xor:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Masks supported by oilf/xilf.
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // Masks supported by oihf/xihf.
+      if ((Imm.getZExtValue() & 0xffffffff) == 0)
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::And:
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      // Any 32-bit AND operation can by implemented via nilf.
+      if (BitSize <= 32)
+        return TTI::TCC_Free;
+      // 64-bit masks supported by nilf.
+      if (isUInt<32>(~Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      // 64-bit masks supported by nilh.
+      if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
+        return TTI::TCC_Free;
+      // Some 64-bit AND operations can be implemented via risbg.
+      const SystemZInstrInfo *TII = ST->getInstrInfo();
+      unsigned Start, End;
+      if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    // Always return TCC_Free for the shift value of a shift instruction.
+    if (Idx == 1)
+      return TTI::TCC_Free;
+    break;
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+  case Instruction::BitCast:
+  case Instruction::PHI:
+  case Instruction::Call:
+  case Instruction::Select:
+  case Instruction::Ret:
+  case Instruction::Load:
+    break;
+  }
+
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
+  // here, so that constant hoisting will ignore this constant.
+  if (BitSize == 0)
+    return TTI::TCC_Free;
+  // No cost model for operations on integers larger than 64 bit implemented yet.
+  if (BitSize > 64)
+    return TTI::TCC_Free;
+
+  switch (IID) {
+  default:
+    return TTI::TCC_Free;
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::usub_with_overflow:
+    // These get expanded to include a normal addition/subtraction.
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      if (isUInt<32>(Imm.getZExtValue()))
+        return TTI::TCC_Free;
+      if (isUInt<32>(-Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Intrinsic::smul_with_overflow:
+  case Intrinsic::umul_with_overflow:
+    // These get expanded to include a normal multiplication.
+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
+      if (isInt<32>(Imm.getSExtValue()))
+        return TTI::TCC_Free;
+    }
+    break;
+  case Intrinsic::experimental_stackmap:
+    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TTI::TCC_Free;
+    break;
+  case Intrinsic::experimental_patchpoint_void:
+  case Intrinsic::experimental_patchpoint_i64:
+    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+      return TTI::TCC_Free;
+    break;
+  }
+  return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+TargetTransformInfo::PopcntSupportKind
+SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
+  assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
+  if (ST->hasPopulationCount() && TyWidth <= 64)
+    return TTI::PSK_FastHardware;
+  return TTI::PSK_Software;
+}
+
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
new file mode 100644
index 0000000..d498913
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -0,0 +1,70 @@
+//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
+  typedef BasicTTIImplBase<SystemZTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const SystemZSubtarget *ST;
+  const SystemZTargetLowering *TLI;
+
+  const SystemZSubtarget *getST() const { return ST; }
+  const SystemZTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
+    : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  SystemZTTIImpl(const SystemZTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  SystemZTTIImpl(SystemZTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+  SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) {
+    BaseT::operator=(static_cast<const BaseT &>(RHS));
+    ST = RHS.ST;
+    TLI = RHS.TLI;
+    return *this;
+  }
+  SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) {
+    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+    ST = std::move(RHS.ST);
+    TLI = std::move(RHS.TLI);
+    return *this;
+  }
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                         Type *Ty);
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 75100fb..db543f3 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -110,7 +110,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
   NameStr += DL->getPrivateGlobalPrefix();
   TM.getNameWithPrefix(NameStr, GV, Mang);
   NameStr.append(Suffix.begin(), Suffix.end());
-  return Ctx->GetOrCreateSymbol(NameStr.str());
+  return Ctx->GetOrCreateSymbol(NameStr);
 }
 
 MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index dd07f81..5807cf7 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -187,5 +187,5 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
   SmallString<60> NameStr;
   getNameWithPrefix(NameStr, GV, Mang);
   const TargetLoweringObjectFile *TLOF = getObjFileLowering();
-  return TLOF->getContext().GetOrCreateSymbol(NameStr.str());
+  return TLOF->getContext().GetOrCreateSymbol(NameStr);
 }
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 236cb1b..1a5bf16 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -183,7 +183,9 @@ void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
 }
 
 static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
-  formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
+                                      raw_pwrite_stream &OS,
+                                      LLVMCodeGenFileType codegen,
+                                      char **ErrorMessage) {
   TargetMachine* TM = unwrap(T);
   Module* Mod = unwrap(M);
 
@@ -229,8 +231,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
     *ErrorMessage = strdup(EC.message().c_str());
     return true;
   }
-  formatted_raw_ostream destf(dest);
-  bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
+  bool Result = LLVMTargetMachineEmit(T, M, dest, codegen, ErrorMessage);
   dest.flush();
   return Result;
 }
@@ -238,15 +239,14 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
 LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
   LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage,
   LLVMMemoryBufferRef *OutMemBuf) {
-  std::string CodeString;
-  raw_string_ostream OStream(CodeString);
-  formatted_raw_ostream Out(OStream);
-  bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage);
+  SmallString<0> CodeString;
+  raw_svector_ostream OStream(CodeString);
+  bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage);
   OStream.flush();
 
-  std::string &Data = OStream.str();
-  *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(),
-                                                     Data.length(), "");
+  StringRef Data = OStream.str();
+  *OutMemBuf =
+      LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), "");
   return Result;
 }
 
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c24805a..93c6ea0 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2571,7 +2571,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   SmallString<16> Tmp;
   Tmp += Base;
   Tmp += ' ';
-  Op.setTokenValue(Tmp.str());
+  Op.setTokenValue(Tmp);
 
   // If this instruction starts with an 'f', then it is a floating point stack
   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 65461af..f265f1d 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -33,15 +33,12 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "X86GenAsmWriter.inc"
 
-void X86ATTInstPrinter::printRegName(raw_ostream &OS,
-                                     unsigned RegNo) const {
-  OS << markup("<reg:")
-     << '%' << getRegisterName(RegNo)
-     << markup(">");
+void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
 }
 
 void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                  StringRef Annot) {
+                                  StringRef Annot, const MCSubtargetInfo &STI) {
   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
@@ -60,7 +57,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
   // InstrInfo.td as soon as Requires clause is supported properly
   // for InstAlias.
   if (MI->getOpcode() == X86::CALLpcrel32 &&
-      (getAvailableFeatures() & X86::Mode64Bit) != 0) {
+      (STI.getFeatureBits() & X86::Mode64Bit) != 0) {
     OS << "\tcallq\t";
     printPCRelImm(MI, 0, OS);
   }
@@ -169,8 +166,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     printRegName(O, Op.getReg());
   } else if (Op.isImm()) {
     // Print X86 immediates as signed values.
-    O << markup("<imm:")
-      << '$' << formatImm((int64_t)Op.getImm())
+    O << markup("<imm:") << '$' << formatImm((int64_t)Op.getImm())
       << markup(">");
 
     // If there are no instruction-specific comments, add a comment clarifying
@@ -182,24 +178,22 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
-    O << markup("<imm:")
-      << '$' << *Op.getExpr()
-      << markup(">");
+    O << markup("<imm:") << '$' << *Op.getExpr() << markup(">");
   }
 }
 
 void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
                                           raw_ostream &O) {
-  const MCOperand &BaseReg  = MI->getOperand(Op+X86::AddrBaseReg);
-  const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
-  const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
-  const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg);
+  const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
+  const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
+  const MCOperand &SegReg = MI->getOperand(Op + X86::AddrSegmentReg);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+X86::AddrSegmentReg, O);
+    printOperand(MI, Op + X86::AddrSegmentReg, O);
     O << ':';
   }
 
@@ -215,16 +209,14 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
   if (IndexReg.getReg() || BaseReg.getReg()) {
     O << '(';
     if (BaseReg.getReg())
-      printOperand(MI, Op+X86::AddrBaseReg, O);
+      printOperand(MI, Op + X86::AddrBaseReg, O);
 
     if (IndexReg.getReg()) {
       O << ',';
-      printOperand(MI, Op+X86::AddrIndexReg, O);
-      unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
+      printOperand(MI, Op + X86::AddrIndexReg, O);
+      unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
       if (ScaleVal != 1) {
-        O << ','
-          << markup("<imm:")
-          << ScaleVal // never printed in hex.
+        O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
           << markup(">");
       }
     }
@@ -236,13 +228,13 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
 
 void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
                                     raw_ostream &O) {
-  const MCOperand &SegReg = MI->getOperand(Op+1);
+  const MCOperand &SegReg = MI->getOperand(Op + 1);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+1, O);
+    printOperand(MI, Op + 1, O);
     O << ':';
   }
 
@@ -267,13 +259,13 @@ void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
 void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
                                        raw_ostream &O) {
   const MCOperand &DispSpec = MI->getOperand(Op);
-  const MCOperand &SegReg = MI->getOperand(Op+1);
+  const MCOperand &SegReg = MI->getOperand(Op + 1);
 
   O << markup("<mem:");
 
   // If this has a segment register, print it.
   if (SegReg.getReg()) {
-    printOperand(MI, Op+1, O);
+    printOperand(MI, Op + 1, O);
     O << ':';
   }
 
@@ -289,7 +281,6 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
 
 void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
                                    raw_ostream &O) {
-  O << markup("<imm:")
-    << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
+  O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
     << markup(">");
 }
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index f71cb81..62b6b73 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -24,14 +24,12 @@ class MCOperand;
 class X86ATTInstPrinter final : public MCInstPrinter {
 public:
   X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
-                    const MCRegisterInfo &MRI, const MCSubtargetInfo &STI)
-    : MCInstPrinter(MAI, MII, MRI) {
-    // Initialize the set of available features.
-    setAvailableFeatures(STI.getFeatureBits());
-  }
+                    const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen, returns true if we successfully printed an
   // alias.
@@ -142,7 +140,6 @@ public:
 private:
   bool HasCustomInstComment;
 };
-
 }
 
 #endif
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 91d1828..4d92daf 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -33,7 +33,8 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
-                                    StringRef Annot) {
+                                    StringRef Annot,
+                                    const MCSubtargetInfo &STI) {
   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 2150144..6e371da 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -28,7 +28,8 @@ public:
     : MCInstPrinter(MAI, MII, MRI) {}
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index a400d46..b84c983 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -360,7 +360,7 @@ public:
   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
     : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386);
   }
 };
@@ -370,7 +370,7 @@ public:
   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
       : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
                                     ELF::EM_X86_64);
   }
@@ -381,7 +381,7 @@ public:
   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
     : ELFX86AsmBackend(T, OSABI, CPU) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64);
   }
 };
@@ -395,7 +395,7 @@ public:
     , Is64Bit(is64Bit) {
   }
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86WinCOFFObjectWriter(OS, Is64Bit);
   }
 };
@@ -752,7 +752,7 @@ public:
                          StringRef CPU)
       : DarwinX86AsmBackend(T, MRI, CPU, false) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
                                      MachO::CPU_TYPE_I386,
                                      MachO::CPU_SUBTYPE_I386_ALL);
@@ -772,7 +772,7 @@ public:
                          StringRef CPU, MachO::CPUSubTypeX86 st)
       : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {}
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
                                      MachO::CPU_TYPE_X86_64, Subtype);
   }
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 76a9d2b..4508883 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -22,7 +22,8 @@ namespace {
   public:
     X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
 
-    virtual ~X86ELFObjectWriter();
+    ~X86ELFObjectWriter() override;
+
   protected:
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
@@ -248,9 +249,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
   return getRelocType32(Modifier, getType32(Type), IsPCRel);
 }
 
-MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
-                                               bool IsELF64,
-                                               uint8_t OSABI,
+MCObjectWriter *llvm::createX86ELFObjectWriter(raw_pwrite_stream &OS,
+                                               bool IsELF64, uint8_t OSABI,
                                                uint16_t EMachine) {
   MCELFObjectTargetWriter *MOTW =
     new X86ELFObjectWriter(IsELF64, OSABI, EMachine);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 9b98a3e..e27b7cb 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -39,7 +39,7 @@ public:
     : MCII(mcii), Ctx(ctx) {
   }
 
-  ~X86MCCodeEmitter() {}
+  ~X86MCCodeEmitter() override {}
 
   bool is64BitMode(const MCSubtargetInfo &STI) const {
     return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 0946326..5bdd844 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -80,7 +80,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
   std::string ArchFS = X86_MC::ParseX86Triple(TT);
   if (!FS.empty()) {
     if (!ArchFS.empty())
-      ArchFS = ArchFS + "," + FS.str();
+      ArchFS = (Twine(ArchFS) + "," + FS).str();
     else
       ArchFS = FS;
   }
@@ -207,14 +207,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+static MCInstPrinter *createX86MCInstPrinter(const Triple &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
                                              const MCInstrInfo &MII,
-                                             const MCRegisterInfo &MRI,
-                                             const MCSubtargetInfo &STI) {
+                                             const MCRegisterInfo &MRI) {
   if (SyntaxVariant == 0)
-    return new X86ATTInstPrinter(MAI, MII, MRI, STI);
+    return new X86ATTInstPrinter(MAI, MII, MRI);
   if (SyntaxVariant == 1)
     return new X86IntelInstPrinter(MAI, MII, MRI);
   return nullptr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 6f50f11..dcdae1d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -31,10 +31,11 @@ class Target;
 class Triple;
 class StringRef;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheX86_32Target, TheX86_64Target;
 
-/// DWARFFlavour - Flavour of dwarf regnumbers
+/// Flavour of dwarf regnumbers
 ///
 namespace DWARFFlavour {
   enum {
@@ -42,7 +43,7 @@ namespace DWARFFlavour {
   };
 }
 
-/// N86 namespace - Native X86 register numbers
+///  Native X86 register numbers
 ///
 namespace N86 {
   enum {
@@ -57,9 +58,8 @@ namespace X86_MC {
 
   void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
 
-  /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
-  /// This is exposed so Asm parser, etc. do not need to go through
-  /// TargetRegistry.
+  /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+  /// do not need to go through TargetRegistry.
   MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
                                             StringRef FS);
 }
@@ -78,27 +78,25 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
 ///
 /// Takes ownership of \p AB and \p CE.
 MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
-                                     raw_ostream &OS, MCCodeEmitter *CE,
+                                     raw_pwrite_stream &OS, MCCodeEmitter *CE,
                                      bool RelaxAll);
 
-/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
-MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
-                                          bool Is64Bit,
+/// Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
-/// createX86ELFObjectWriter - Construct an X86 ELF object writer.
-MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS,
-                                         bool IsELF64,
-                                         uint8_t OSABI,
-                                         uint16_t EMachine);
-/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer.
-MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+/// Construct an X86 ELF object writer.
+MCObjectWriter *createX86ELFObjectWriter(raw_pwrite_stream &OS, bool IsELF64,
+                                         uint8_t OSABI, uint16_t EMachine);
+/// Construct an X86 Win COFF object writer.
+MCObjectWriter *createX86WinCOFFObjectWriter(raw_pwrite_stream &OS,
+                                             bool Is64Bit);
 
-/// createX86_64MachORelocationInfo - Construct X86-64 Mach-O relocation info.
+/// Construct X86-64 Mach-O relocation info.
 MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
 
-/// createX86_64ELFORelocationInfo - Construct X86-64 ELF relocation info.
+/// Construct X86-64 ELF relocation info.
 MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
 } // End llvm namespace
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 7a83f4c..38539cd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -575,9 +575,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
   Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
 }
 
-MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
-                                                bool Is64Bit,
-                                                uint32_t CPUType,
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_pwrite_stream &OS,
+                                                bool Is64Bit, uint32_t CPUType,
                                                 uint32_t CPUSubtype) {
   return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
                                                         CPUType,
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index e1df5c2..bd1bc99 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -25,7 +25,7 @@ namespace {
   class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
   public:
     X86WinCOFFObjectWriter(bool Is64Bit);
-    virtual ~X86WinCOFFObjectWriter();
+    ~X86WinCOFFObjectWriter() override;
 
     unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsCrossSection,
@@ -90,7 +90,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
     llvm_unreachable("Unsupported COFF machine type.");
 }
 
-MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_pwrite_stream &OS,
                                                    bool Is64Bit) {
   MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit);
   return createWinCOFFObjectWriter(MOTW, OS);
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 5690efe..92f42b6 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -18,8 +18,8 @@ class X86WinCOFFStreamer : public MCWinCOFFStreamer {
   Win64EH::UnwindEmitter EHStreamer;
 public:
   X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE,
-                     raw_ostream &OS)
-    : MCWinCOFFStreamer(C, AB, *CE, OS) { }
+                     raw_pwrite_stream &OS)
+      : MCWinCOFFStreamer(C, AB, *CE, OS) {}
 
   void EmitWinEHHandlerData() override;
   void EmitWindowsUnwindTables() override;
@@ -49,8 +49,8 @@ void X86WinCOFFStreamer::FinishImpl() {
 }
 
 MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
-                                           raw_ostream &OS, MCCodeEmitter *CE,
-                                           bool RelaxAll) {
+                                           raw_pwrite_stream &OS,
+                                           MCCodeEmitter *CE, bool RelaxAll) {
   X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
   S->getAssembler().setRelaxAll(RelaxAll);
   return S;
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 4f9836d..d13f155 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -273,17 +273,15 @@ def : SilvermontProc<"silvermont">;
 def : SilvermontProc<"slm">; // Legacy alias.
 
 // "Arrandale" along with corei3 and corei5
-class NehalemProc<string Name, list<SubtargetFeature> AdditionalFeatures>
-    : ProcessorModel<Name, SandyBridgeModel, !listconcat([
-                                                           FeatureSSE42,
-                                                           FeatureCMPXCHG16B,
-                                                           FeatureSlowBTMem,
-                                                           FeatureFastUAMem,
-                                                           FeaturePOPCNT
-                                                         ],
-                                                         AdditionalFeatures)>;
-def : NehalemProc<"nehalem", []>;
-def : NehalemProc<"corei7", [FeatureAES]>;
+class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+                                   FeatureSSE42,
+                                   FeatureCMPXCHG16B,
+                                   FeatureSlowBTMem,
+                                   FeatureFastUAMem,
+                                   FeaturePOPCNT
+                                 ]>;
+def : NehalemProc<"nehalem">;
+def : NehalemProc<"corei7">;
 
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index f6033a7..2ed4975 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -523,7 +523,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
       // must be registered in .sxdata.  Use of any unregistered handlers will
       // cause the process to terminate immediately.  LLVM does not know how to
       // register any SEH handlers, so its object files should be safe.
-      S->setAbsolute();
       OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
       OutStreamer.EmitAssignment(
           S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
@@ -723,28 +722,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     }
   }
 
-  if (TT.isOSBinFormatELF()) {
-    const TargetLoweringObjectFileELF &TLOFELF =
-      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
-    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
-    // Output stubs for external and common global variables.
-    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
-    if (!Stubs.empty()) {
-      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
-      const DataLayout *TD = TM.getDataLayout();
-
-      for (const auto &Stub : Stubs) {
-        OutStreamer.EmitLabel(Stub.first);
-        OutStreamer.EmitSymbolValue(Stub.second.getPointer(),
-                                    TD->getPointerSize());
-      }
-      Stubs.clear();
-    }
-
+  if (TT.isOSBinFormatELF())
     SM.serializeToStackMapSection();
-  }
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cba140f..cdf10a7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2417,6 +2417,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     // FIXME may need to add RegState::Debug to any registers produced,
     // although ESP/EBP should be the only ones at the moment.
+    assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+           "Expected inlined-at fields to agree");
     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
         .addImm(0)
         .addMetadata(DI->getVariable())
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index fb12ce5..5da7acf 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2187,7 +2187,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
       break;
 
-    unsigned ShlOp, Op;
+    unsigned ShlOp, AddOp, Op;
     MVT CstVT = NVT;
 
     // Check the minimum bitwidth for the new constant.
@@ -2208,6 +2208,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     case MVT::i32:
       assert(CstVT == MVT::i8);
       ShlOp = X86::SHL32ri;
+      AddOp = X86::ADD32rr;
 
       switch (Opcode) {
       default: llvm_unreachable("Impossible opcode");
@@ -2219,6 +2220,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     case MVT::i64:
       assert(CstVT == MVT::i8 || CstVT == MVT::i32);
       ShlOp = X86::SHL64ri;
+      AddOp = X86::ADD64rr;
 
       switch (Opcode) {
       default: llvm_unreachable("Impossible opcode");
@@ -2232,6 +2234,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     // Emit the smaller op and the shift.
     SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
     SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+    if (ShlVal == 1)
+      return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
+                                  SDValue(New, 0));
     return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
                                 getI8Imm(ShlVal));
   }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 8b92e70..c32412a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -32,6 +32,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
@@ -2142,6 +2143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
 
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile*/false, /*AlwaysInline=*/true,
+                       /*isTailCall*/false,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
@@ -2277,6 +2279,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                                           const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
 
   const Function* Fn = MF.getFunction();
   if (Fn->hasExternalLinkage() &&
@@ -2416,6 +2419,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         MFI->CreateFixedObject(1, StackSize, true));
   }
 
+  MachineModuleInfo &MMI = MF.getMMI();
+  const Function *WinEHParent = nullptr;
+  if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
+    WinEHParent = MMI.getWinEHParent(Fn);
+  bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
+  bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
+
   // Figure out if XMM registers are in use.
   assert(!(MF.getTarget().Options.UseSoftFloat &&
            Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
@@ -2452,7 +2462,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     }
 
     if (IsWin64) {
-      const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
       // Get to the caller-allocated home save location.  Add 8 to account
       // for the return address.
       int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2505,6 +2514,27 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     if (!MemOps.empty())
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+  } else if (IsWinEHOutlined) {
+    // Get to the caller-allocated home save location.  Add 8 to account
+    // for the return address.
+    int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+    FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
+        /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
+
+    MMI.getWinEHFuncInfo(Fn)
+        .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
+        FuncInfo->getRegSaveFrameIndex();
+
+    // Store the second integer parameter (rdx) into rsp+16 relative to the
+    // stack pointer at the entry of the function.
+    SDValue RSFIN =
+        DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
+    unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
+    Chain = DAG.getStore(
+        Val.getValue(1), dl, Val, RSFIN,
+        MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
+        /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
   }
 
   if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
@@ -2571,6 +2601,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   FuncInfo->setArgumentStackSize(StackSize);
 
+  if (IsWinEHParent) {
+    int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+    SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
+    MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
+    SDValue Neg2 = DAG.getConstant(-2, MVT::i64);
+    Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
+                         MachinePointerInfo::getFixedStack(UnwindHelpFI),
+                         /*isVolatile=*/true,
+                         /*isNonTemporal=*/false, /*Alignment=*/0);
+  }
+
   return Chain;
 }
 
@@ -4420,6 +4461,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
   SDLoc dl(Op);
   SDValue V;
   bool First = true;
+
+  // SSE4.1 - use PINSRB to insert each byte directly.
+  if (Subtarget->hasSSE41()) {
+    for (unsigned i = 0; i < 16; ++i) {
+      bool isNonZero = (NonZeros & (1 << i)) != 0;
+      if (isNonZero) {
+        if (First) {
+          if (NumZero)
+            V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
+          else
+            V = DAG.getUNDEF(MVT::v16i8);
+          First = false;
+        }
+        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+                        MVT::v16i8, V, Op.getOperand(i),
+                        DAG.getIntPtrConstant(i));
+      }
+    }
+
+    return V;
+  }
+
+  // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
   for (unsigned i = 0; i < 16; ++i) {
     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
     if (ThisIsNonZero && First) {
@@ -5650,14 +5714,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
         return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
       }
 
+      // We can't directly insert an i8 or i16 into a vector, so zero extend
+      // it to i32 first.
       if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
-        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
         if (VT.is256BitVector()) {
-          SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
-          Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+          if (Subtarget->hasAVX()) {
+            Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item);
+            Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+          } else {
+            // Without AVX, we need to extend to a 128-bit vector and then
+            // insert into the 256-bit vector.
+            Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
+            SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
+            Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+          }
         } else {
           assert(VT.is128BitVector() && "Expected an SSE value type!");
+          Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
           Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
         }
         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5877,7 +5951,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   unsigned NumElems = ResVT.getVectorNumElements();
-  if(ResVT.is256BitVector())
+  if (ResVT.is256BitVector())
     return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
 
   if (Op.getNumOperands() == 4) {
@@ -9281,15 +9355,6 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
 
-  // If we have a single input to the zero element, insert that into V1 if we
-  // can do so cheaply.
-  int NumV2Elements =
-      std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
-  if (NumV2Elements == 1 && Mask[0] >= 4)
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
-      return Insertion;
-
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
                                                 Subtarget, DAG))
     return Blend;
@@ -9432,15 +9497,6 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
-  // If we have a single input to the zero element, insert that into V1 if we
-  // can do so cheaply.
-  int NumV2Elements =
-      std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; });
-  if (NumV2Elements == 1 && Mask[0] >= 8)
-    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
-      return Insertion;
-
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
                                                 Subtarget, DAG))
     return Blend;
@@ -9811,6 +9867,18 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   ArrayRef<int> Mask = SVOp->getMask();
 
+  // If we have a single input to the zero element, insert that into V1 if we
+  // can do so cheaply.
+  int NumElts = VT.getVectorNumElements();
+  int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) {
+    return M >= NumElts;
+  });
+  
+  if (NumV2Elements == 1 && Mask[0] >= NumElts)
+    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+                              DL, VT, V1, V2, Mask, Subtarget, DAG))
+      return Insertion;
+
   // There is a really nice hard cut-over between AVX1 and AVX2 that means we can
   // check for those subtargets here and avoid much of the subtarget querying in
   // the per-vector-type lowering routines. With AVX1 we have essentially *zero*
@@ -11903,7 +11971,7 @@ static  SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
   // Now we have only mask extension
   assert(InVT.getVectorElementType() == MVT::i1);
   SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
-  const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+  const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue();
   SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
   SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
@@ -11979,7 +12047,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
     }
 
     SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
-    const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+    const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue();
     SDValue CP = DAG.getConstantPool(C, getPointerTy());
     unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
     SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
@@ -12750,6 +12818,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
   return SDValue();
 }
 
+/// If we have at least two divisions that use the same divisor, convert to
+/// multplication by a reciprocal. This may need to be adjusted for a given
+/// CPU if a division's cost is not at least twice the cost of a multiplication.
+/// This is because we still need one division to calculate the reciprocal and
+/// then we need two multiplies by that reciprocal as replacements for the
+/// original divisions.
+bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+  return NumUsers > 1;
+}
+
 static bool isAllOnes(SDValue V) {
   ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
   return C && C->isAllOnesValue();
@@ -14427,7 +14505,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
 
   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
                        DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
-                       false,
+                       false, false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
@@ -15220,10 +15298,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
   }
   case PREFETCH: {
     SDValue Hint = Op.getOperand(6);
-    unsigned HintVal;
-    if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
-        (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
-      llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
+    unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
+    assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1");
     unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
     SDValue Chain = Op.getOperand(0);
     SDValue Mask  = Op.getOperand(2);
@@ -24175,7 +24251,7 @@ TargetLowering::ConstraintWeight
     break;
   case 'G':
   case 'C':
-    if (dyn_cast<ConstantFP>(CallOperandVal)) {
+    if (isa<ConstantFP>(CallOperandVal)) {
       weight = CW_Constant;
     }
     break;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index dd20ec2..5130c37 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1072,6 +1072,9 @@ namespace llvm {
     /// Use rcp* to speed up fdiv calculations.
     SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                              unsigned &RefinementSteps) const override;
+
+    /// Reassociate floating point divisions into multiply by reciprocal.
+    bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
   };
 
   namespace X86 {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 509602f..0959162 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -2971,60 +2971,36 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
                                     itins, HasBWI, IsCommutable>;
 }
 
-multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
-                            ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
-                            PatFrag memop_frag, X86MemOperand x86memop,
-                            PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
-                            string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
-  let isCommutable = IsCommutable in
-  {
-    def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src1, RC:$src2),
-       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-       []>, EVEX_4V;
-    def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-               (ins KRC:$mask, RC:$src1, RC:$src2),
-               !strconcat(OpcodeStr,
-                  "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
-               [], itins.rr>, EVEX_4V, EVEX_K;
-    def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, RC:$src2),
-                !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}} {z}" ,
-                    "|$dst {${mask}} {z}, $src1, $src2}"),
-                [], itins.rr>, EVEX_4V, EVEX_KZ;
-  }
+multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
+                            SDNode OpNode,X86VectorVTInfo _Src, 
+                            X86VectorVTInfo _Dst, bit IsCommutable = 0> {
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 
+                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+                            "$src2, $src1","$src1, $src2", 
+                            (_Dst.VT (OpNode 
+                                         (_Src.VT _Src.RC:$src1), 
+                                         (_Src.VT _Src.RC:$src2))),
+                            "",itins.rr, IsCommutable>, 
+                            AVX512BIBase, EVEX_4V;
   let mayLoad = 1 in {
-    def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-              (ins RC:$src1, x86memop:$src2),
-              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              []>, EVEX_4V;
-    def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-               (ins KRC:$mask, RC:$src1, x86memop:$src2),
-               !strconcat(OpcodeStr,
-                   "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
-               [], itins.rm>, EVEX_4V, EVEX_K;
-    def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, x86memop:$src2),
-                !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
-                [], itins.rm>, EVEX_4V, EVEX_KZ;
-    def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-               (ins RC:$src1, x86scalar_mop:$src2),
-               !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                          ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
-               [], itins.rm>, EVEX_4V, EVEX_B;
-    def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
-                !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                           ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
-                           BrdcstStr, "}"),
-                [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
-    def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-                 (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
-                 !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                            ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
-                            BrdcstStr, "}"),
-                 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
+      defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                            (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
+                            "$src2, $src1", "$src1, $src2",
+                            (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
+                                          (bitconvert (_Src.LdFrag addr:$src2)))),
+                            "", itins.rm>,
+                            AVX512BIBase, EVEX_4V;
+
+      defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                        (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2), 
+                        OpcodeStr,
+                        "${src2}"##_Dst.BroadcastStr##", $src1",
+                         "$src1, ${src2}"##_Dst.BroadcastStr,
+                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32 
+                                     (_Dst.VT (X86VBroadcast 
+                                              (_Dst.ScalarLdFrag addr:$src2)))))),
+                        "", itins.rm>,
+                        AVX512BIBase, EVEX_4V, EVEX_B;
   }
 }
 
@@ -3039,24 +3015,13 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
 
-defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
-                   loadv8i64, i512mem, loadi64, i64mem, "{1to8}",
-                   SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
-                   EVEX_CD8<64, CD8VF>, VEX_W;
-
-defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
-                   loadv8i64, i512mem, loadi64, i64mem, "{1to8}",
-                   SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
+                   X86pmuldq, v16i32_info, v8i64_info, 1>, 
+                   T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
 
-def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
-          (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
-
-def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
-           (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
-           (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VPMULDQZrr VR512:$src1, VR512:$src2)>;
+defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
+                   X86pmuludq, v16i32_info, v8i64_info, 1>, 
+                   EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
 
 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
@@ -3208,7 +3173,7 @@ defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
                                   SSE_INTALU_ITINS_P, HasAVX512, 1>;
 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
-                                  SSE_INTALU_ITINS_P, HasAVX512, 1>;
+                                  SSE_INTALU_ITINS_P, HasAVX512, 0>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  FP arithmetic
@@ -3743,16 +3708,19 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
               (ins _.RC:$src2, _.ScalarMemOp:$src3),
-              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ),
-              (OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,	
+              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
+              !strconcat("$src2, ${src3}", _.BroadcastStr ),
+              (OpNode _.RC:$src1,
+               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,	
               AVX512FMA3Base, EVEX_B;
  }
 } // Constraints = "$src1 = $dst"
 
 let Constraints = "$src1 = $dst" in {
 // Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
-                           SDPatternOperator OpNode> {
+multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
+                                 X86VectorVTInfo _,
+                                 SDPatternOperator OpNode> {
    defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
@@ -3772,7 +3740,6 @@ multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
                               SDPatternOperator OpNode> {
   defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
                               VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
-
   defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
                               VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
 }
@@ -3794,12 +3761,14 @@ let ExeDomain = SSEPackedSingle in {
 let ExeDomain = SSEPackedDouble in {
     defm  NAME##PDZ     : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
                                              v8f64_info, OpNode>,
-                          avx512_fma3_round_forms<opc213, OpcodeStr,
-                                             v8f64_info, OpNodeRnd>, EVEX_V512, VEX_W;
+                          avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
+                                                  OpNodeRnd>, EVEX_V512, VEX_W;
     defm  NAME##PDZ256  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v4f64x_info, OpNode>, EVEX_V256, VEX_W;
+                                             v4f64x_info, OpNode>,
+                                             EVEX_V256, VEX_W;
     defm  NAME##PDZ128  : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
-                                             v2f64x_info, OpNode>, EVEX_V128, VEX_W;
+                                             v2f64x_info, OpNode>,
+                                             EVEX_V128, VEX_W;
   }
 }
 
@@ -3830,26 +3799,29 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 } // Constraints = "$src1 = $dst"
 
-
-multiclass avx512_fma3p_m132_f<bits<8> opc,
-                              string OpcodeStr,
-                              SDNode OpNode> {
+multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
 
 let ExeDomain = SSEPackedSingle in {
     defm NAME##PSZ      : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+                                             OpNode,v16f32_info>, EVEX_V512,
+                                             EVEX_CD8<32, CD8VF>;
     defm NAME##PSZ256   : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>;
+                                             OpNode, v8f32x_info>, EVEX_V256,
+                                             EVEX_CD8<32, CD8VF>;
     defm NAME##PSZ128   : avx512_fma3p_m132<opc, OpcodeStr##ps,
-                                             OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>;
+                                             OpNode, v4f32x_info>, EVEX_V128,
+                                             EVEX_CD8<32, CD8VF>;
   }
 let ExeDomain = SSEPackedDouble in {
     defm  NAME##PDZ       : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v8f64_info>, EVEX_V512,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
     defm  NAME##PDZ256    : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v4f64x_info>, EVEX_V256,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
     defm  NAME##PDZ128    : avx512_fma3p_m132<opc, OpcodeStr##pd,
-                                           OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>;
+                                           OpNode, v2f64x_info>, EVEX_V128,
+                                           VEX_W, EVEX_CD8<32, CD8VF>;
   }
 }
 
@@ -3860,7 +3832,6 @@ defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
 defm VFNMADD132   : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
 defm VFNMSUB132   : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
 
-
 // Scalar FMA
 let Constraints = "$src1 = $dst" in {
 multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3883,7 +3854,6 @@ multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                      (OpVT (OpNode RC:$src2, RC:$src1,
                             (mem_frag addr:$src3))))]>;
 }
-
 } // Constraints = "$src1 = $dst"
 
 defm VFMADDSSZ  : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
@@ -3920,6 +3890,7 @@ let hasSideEffects = 0 in {
               EVEX_4V;
 } // hasSideEffects = 0
 }
+
 let Predicates = [HasAVX512] in {
 defm VCVTSI2SSZ   : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
                                   XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 78efc4d..5e19ad4 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1216,10 +1216,10 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
 let isCompare = 1 in {
   let Defs = [EFLAGS] in {
     let isCommutable = 1 in {
-      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
-      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
-      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
-      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+      def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat>;
+      def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>;
+      def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>;
+      def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>;
     } // isCommutable
 
     def TEST8rm    : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 18bbe5d..45e6d0a 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1232,7 +1232,11 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
 // with implicit zero-extension instead of a 64-bit and if the immediate has at
 // least 32 bits of leading zeros. If in addition the last 32 bits can be
 // represented with a sign extension of a 8 bit constant, use that.
+// This can also reduce instruction size by eliminating the need for the REX
+// prefix.
 
+// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
+let AddedComplexity = 1 in {
 def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
           (SUBREG_TO_REG
             (i64 0),
@@ -1248,8 +1252,13 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
               (EXTRACT_SUBREG GR64:$src, sub_32bit),
               (i32 (GetLo32XForm imm:$imm))),
             sub_32bit)>;
+} // AddedComplexity = 1
 
 
+// AddedComplexity is needed due to the increased complexity on the
+// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all
+// the MOVZX patterns keeps thems together in DAGIsel tables.
+let AddedComplexity = 1 in {
 // r & (2^16-1) ==> movz
 def : Pat<(and GR32:$src1, 0xffff),
           (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
@@ -1272,6 +1281,7 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
                          (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
                          sub_32bit)>;
 // r & (2^16-1) ==> movz
+let AddedComplexity = 1 in // Give priority over i64immZExt32.
 def : Pat<(and GR64:$src, 0xffff),
           (SUBREG_TO_REG (i64 0),
                       (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
@@ -1290,6 +1300,7 @@ def : Pat<(and GR16:$src1, 0xff),
            (EXTRACT_SUBREG (MOVZX32rr8 (i8
             (EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
       Requires<[In64BitMode]>;
+} // AddedComplexity = 1
 
 
 // sext_inreg patterns
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0bdabdf..b75a9f4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -631,53 +631,53 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
 
 def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedLoadSDNode>(N))
-    return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 16;
   return false;
 }]>;
 
 def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedLoadSDNode>(N))
-    return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 32;
   return false;
 }]>;
 
 def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedLoadSDNode>(N))
-    return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
+  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+    return Load->getAlignment() >= 64;
   return false;
 }]>;
 
 def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  return (dyn_cast<MaskedLoadSDNode>(N) != 0);
+  return isa<MaskedLoadSDNode>(N);
 }]>;
 
 def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedStoreSDNode>(N))
-    return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 16;
   return false;
 }]>;
 
 def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedStoreSDNode>(N))
-    return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 32;
   return false;
 }]>;
 
 def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  if (dyn_cast<MaskedStoreSDNode>(N))
-    return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
+  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+    return Store->getAlignment() >= 64;
   return false;
 }]>;
 
 def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_store node:$src1, node:$src2, node:$src3), [{
-  return (dyn_cast<MaskedStoreSDNode>(N) != 0);
+  return isa<MaskedStoreSDNode>(N);
 }]>;
 
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 538ec1c..fbfd868 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -559,6 +559,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MMX_PABSWrr64,     X86::MMX_PABSWrm64,     0 },
     { X86::MMX_PSHUFWri,      X86::MMX_PSHUFWmi,      0 },
 
+    // 3DNow! version of foldable instructions
+    { X86::PF2IDrr,         X86::PF2IDrm,             0 },
+    { X86::PF2IWrr,         X86::PF2IWrm,             0 },
+    { X86::PFRCPrr,         X86::PFRCPrm,             0 },
+    { X86::PFRSQRTrr,       X86::PFRSQRTrm,           0 },
+    { X86::PI2FDrr,         X86::PI2FDrm,             0 },
+    { X86::PI2FWrr,         X86::PI2FWrm,             0 },
+    { X86::PSWAPDrr,        X86::PSWAPDrm,            0 },
+
     // AVX 128-bit versions of foldable instructions
     { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 },
     { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 },
@@ -943,6 +952,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::CMPPSrri,        X86::CMPPSrmi,      TB_ALIGN_16 },
     { X86::CMPSDrr,         X86::CMPSDrm,       0 },
     { X86::CMPSSrr,         X86::CMPSSrm,       0 },
+    { X86::CRC32r32r32,     X86::CRC32r32m32,   0 },
+    { X86::CRC32r64r64,     X86::CRC32r64m64,   0 },
     { X86::DIVPDrr,         X86::DIVPDrm,       TB_ALIGN_16 },
     { X86::DIVPSrr,         X86::DIVPSrm,       TB_ALIGN_16 },
     { X86::DIVSDrr,         X86::DIVSDrm,       0 },
@@ -1201,6 +1212,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MMX_PUNPCKLWDirr,  X86::MMX_PUNPCKLWDirm,  0 },
     { X86::MMX_PXORirr,       X86::MMX_PXORirm,       0 },
 
+    // 3DNow! version of foldable instructions
+    { X86::PAVGUSBrr,         X86::PAVGUSBrm,         0 },
+    { X86::PFACCrr,           X86::PFACCrm,           0 },
+    { X86::PFADDrr,           X86::PFADDrm,           0 },
+    { X86::PFCMPEQrr,         X86::PFCMPEQrm,         0 },
+    { X86::PFCMPGErr,         X86::PFCMPGErm,         0 },
+    { X86::PFCMPGTrr,         X86::PFCMPGTrm,         0 },
+    { X86::PFMAXrr,           X86::PFMAXrm,           0 },
+    { X86::PFMINrr,           X86::PFMINrm,           0 },
+    { X86::PFMULrr,           X86::PFMULrm,           0 },
+    { X86::PFNACCrr,          X86::PFNACCrm,          0 },
+    { X86::PFPNACCrr,         X86::PFPNACCrm,         0 },
+    { X86::PFRCPIT1rr,        X86::PFRCPIT1rm,        0 },
+    { X86::PFRCPIT2rr,        X86::PFRCPIT2rm,        0 },
+    { X86::PFRSQIT1rr,        X86::PFRSQIT1rm,        0 },
+    { X86::PFSUBrr,           X86::PFSUBrm,           0 },
+    { X86::PFSUBRrr,          X86::PFSUBRrm,          0 },
+    { X86::PMULHRWrr,         X86::PMULHRWrm,         0 },
+
     // AVX 128-bit versions of foldable instructions
     { X86::VCVTSD2SSrr,       X86::VCVTSD2SSrm,        0 },
     { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    0 },
@@ -5969,6 +5999,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
   { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
   { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
+  { X86::MOVLPSmr,   X86::MOVLPDmr,  X86::MOVPQI2QImr  },
   { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
   { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
   { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
@@ -5984,6 +6015,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
   { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
   { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
+  // TODO: Add the AVX versions of MOVLPSmr
   { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
   { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
   { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ccdbf0e..65b155c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -643,9 +643,6 @@ let Predicates = [UseAVX] in {
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
-  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
-                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
@@ -653,9 +650,6 @@ let Predicates = [UseAVX] in {
                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
   }
-  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
-                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
 
   // Extract and store.
   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -793,7 +787,7 @@ let Predicates = [UseSSE2] in {
             (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
 
   // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
-  // is during lowering, where it's not possible to recognize the fold cause
+  // is during lowering, where it's not possible to recognize the fold because
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
   def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
@@ -3678,13 +3672,30 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                   PS, Requires<[HasSSE2]>;
 } // SchedRW = [WriteStore]
 
+let Predicates = [HasAVX2, NoVLX] in {
+  def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
+            (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+}
+
 let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
-            (VMOVNTPSmr addr:$dst, VR128:$src)>;
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
+            (VMOVNTDQmr addr:$dst, VR128:$src)>;
 }
 
 def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
-          (MOVNTPSmr addr:$dst, VR128:$src)>;
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
+def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
+def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
+          (MOVNTDQmr addr:$dst, VR128:$src)>;
 
 } // AddedComplexity
 
@@ -4890,7 +4901,8 @@ let Predicates = [UseAVX] in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
               (VMOVDI2PDIrr GR32:$src)>;
 
-  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+  // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
+  // These instructions also write zeros in the high part of a 256-bit register.
   let AddedComplexity = 20 in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
               (VMOVDI2PDIrm addr:$src)>;
@@ -4898,6 +4910,9 @@ let Predicates = [UseAVX] in {
               (VMOVDI2PDIrm addr:$src)>;
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
               (VMOVDI2PDIrm addr:$src)>;
+    def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+                (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+              (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
   }
   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
@@ -5016,6 +5031,9 @@ let Predicates = [UseAVX], AddedComplexity = 20 in {
             (VMOVZQI2PQIrm addr:$src)>;
   def : Pat<(v2i64 (X86vzload addr:$src)),
             (VMOVZQI2PQIrm addr:$src)>;
+  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+              (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
+            (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>;
 }
 
 let Predicates = [UseSSE2], AddedComplexity = 20 in {
@@ -7150,6 +7168,10 @@ let Predicates = [HasAVX2] in {
 }
 
 // Patterns
+// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or
+// on targets where they have equal performance. These were changed to use
+// blends because blends have better throughput on SandyBridge and Haswell, but
+// movs[s/d] are 1-2 byte shorter instructions.
 let Predicates = [UseAVX] in {
   let AddedComplexity = 15 in {
   // Move scalar to XMM zero-extended, zeroing a VR128 then do a
@@ -7166,8 +7188,10 @@ let Predicates = [UseAVX] in {
   // Move low f32 and clear high bits.
   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
             (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>;
-  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
-            (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
+
+  // Move low f64 and clear high bits.
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
   }
 
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
@@ -7181,14 +7205,19 @@ let Predicates = [UseAVX] in {
                            (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
                            sub_xmm)>;
 
-  // Move low f64 and clear high bits.
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
-
+  // These will incur an FP/int domain crossing penalty, but it may be the only
+  // way without AVX2. Do not add any complexity because we may be able to match
+  // more optimal patterns defined earlier in this file.
+  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+            (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
             (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>;
 }
 
+// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or
+// on targets where they have equal performance. These were changed to use
+// blends because blends have better throughput on SandyBridge and Haswell, but
+// movs[s/d] are 1-2 byte shorter instructions.
 let Predicates = [UseSSE41] in {
   // With SSE41 we can use blends for these patterns.
   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
@@ -8341,7 +8370,7 @@ let Predicates = [HasAVX2] in {
   def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
           (VBROADCASTSDYrr VR128:$src)>;
 
-  // Provide aliases for broadcast from the same regitser class that
+  // Provide aliases for broadcast from the same register class that
   // automatically does the extract.
   def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
             (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 42256b2..28a3b7b 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -334,6 +334,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::FMUL_RND),
   X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
                      X86ISD::FMUL_RND),
+  X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128,  CMP_MASK,  X86ISD::PCMPEQM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256,  CMP_MASK,  X86ISD::PCMPEQM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512,  CMP_MASK,  X86ISD::PCMPEQM, 0),
@@ -358,6 +362,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512,  CMP_MASK,  X86ISD::PCMPGTM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::PMULUDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_d,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psll_q,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pslli_d,       VSHIFT_MASK, X86ISD::VSHLI, 0),
@@ -376,6 +386,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_psrli_q,       VSHIFT_MASK, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrlv_d,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrlv_q,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_sd,   INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::RNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ss,   INTR_TYPE_SCALAR_MASK_RM,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index ca8fc9c..4bfc7f9 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -193,7 +193,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                                       DAG.getConstant(Offset, AddrVT)),
                           Src,
                           DAG.getConstant(BytesLeft, SizeVT),
-                          Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+                          Align, isVolatile, false,
+                          DstPtrInfo.getWithOffset(Offset));
   }
 
   // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -282,7 +283,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
                                     DAG.getNode(ISD::ADD, dl, SrcVT, Src,
                                                 DAG.getConstant(Offset, SrcVT)),
                                     DAG.getConstant(BytesLeft, SizeVT),
-                                    Align, isVolatile, AlwaysInline,
+                                    Align, isVolatile, AlwaysInline, false,
                                     DstPtrInfo.getWithOffset(Offset),
                                     SrcPtrInfo.getWithOffset(Offset)));
   }
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 215fe89..36b3b02 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -30,7 +30,7 @@ void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 }
 
 void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
-                                 StringRef Annot) {
+                                 StringRef Annot, const MCSubtargetInfo &STI) {
   printInstruction(MI, O);
   printAnnotation(O, Annot);
 }
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 78521fd..6fd2dec 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -32,7 +32,8 @@ public:
   static const char *getRegisterName(unsigned RegNo);
 
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
 private:
   void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
   void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index d0a09b2..4a790c8 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -81,12 +81,11 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCInstPrinter *createXCoreMCInstPrinter(const Target &T,
+static MCInstPrinter *createXCoreMCInstPrinter(const Triple &T,
                                                unsigned SyntaxVariant,
                                                const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
-                                               const MCRegisterInfo &MRI,
-                                               const MCSubtargetInfo &STI) {
+                                               const MCRegisterInfo &MRI) {
   return new XCoreInstPrinter(MAI, MII, MRI);
 }
 
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 6e8a95a..c4e3bb8 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -308,7 +308,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
     Constant *GA = ConstantExpr::getBitCast(const_cast<GlobalValue*>(GV), Ty);
     Ty = Type::getInt32Ty(*DAG.getContext());
     Constant *Idx = ConstantInt::get(Ty, Offset);
-    Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx);
+    Constant *GAI = ConstantExpr::getGetElementPtr(
+        Type::getInt8Ty(*DAG.getContext()), GA, Idx);
     SDValue CP = DAG.getConstantPool(GAI, MVT::i32);
     return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP,
                        MachinePointerInfo(), false, false, false, 0);
@@ -1422,7 +1423,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       InVals.push_back(FIN);
       MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV,
                                      DAG.getConstant(Size, MVT::i32),
-                                     Align, false, false,
+                                     Align, false, false, false,
                                      MachinePointerInfo(),
                                      MachinePointerInfo()));
     } else {
@@ -1833,10 +1834,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
         LD->getAlignment() == Alignment &&
         !LD->isVolatile() && !LD->isIndexed() &&
         Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+        bool isTail = isInTailCallPosition(DAG, ST, Chain);
         return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
                               LD->getBasePtr(),
                               DAG.getConstant(StoreBits/8, MVT::i32),
-                              Alignment, false, ST->getPointerInfo(),
+                              Alignment, false, isTail, ST->getPointerInfo(),
                               LD->getPointerInfo());
       }
     }
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index b4c6a50..9fb63e9 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -82,8 +82,9 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
     case Instruction::GetElementPtr: {
       SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end());
       ArrayRef<Value *> CEOps(CEOpVec);
-      return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(CEOps[0],
-                                                             CEOps.slice(1)));
+      return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(
+          cast<GEPOperator>(CE)->getSourceElementType(), CEOps[0],
+          CEOps.slice(1)));
     }
     case Instruction::Add:
     case Instruction::Sub:
@@ -212,7 +213,8 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
     SmallVector<Value *, 2> Indices;
     Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx)));
     Indices.push_back(ThreadID);
-    Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices);
+    Value *Addr =
+        Builder.CreateInBoundsGEP(NewGV->getValueType(), NewGV, Indices);
     U->replaceUsesOfWith(GV, Addr);
   }
 
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index 48bf0fa..3563dbc 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
 class XCoreTargetStreamer : public MCTargetStreamer {
 public:
   XCoreTargetStreamer(MCStreamer &S);
-  virtual ~XCoreTargetStreamer();
+  ~XCoreTargetStreamer() override;
   virtual void emitCCTopData(StringRef Name) = 0;
   virtual void emitCCTopFunction(StringRef Name) = 0;
   virtual void emitCCBottomData(StringRef Name) = 0;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 46480bd..56975ea 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -207,6 +207,13 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   // Make sure that it is local to this module.
   if (!F || !F->hasLocalLinkage()) return nullptr;
 
+  // Don't promote arguments for variadic functions. Adding, removing, or
+  // changing non-pack parameters can change the classification of pack
+  // parameters. Frontends encode that classification at the call site in the
+  // IR, while in the callee the classification is determined dynamically based
+  // on the number of registers consumed so far.
+  if (F->isVarArg()) return nullptr;
+
   // First check: see if there are any pointer arguments!  If not, quick exit.
   SmallVector<Argument*, 16> PointerArgs;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
@@ -227,12 +234,6 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
       isSelfRecursive = true;
   }
   
-  // Don't promote arguments for variadic functions. Adding, removing, or
-  // changing non-pack parameters can change the classification of pack
-  // parameters. Frontends encode that classification at the call site in the
-  // IR, while in the callee the classification is determined dynamically based
-  // on the number of registers consumed so far.
-  if (F->isVarArg()) return nullptr;
   const DataLayout &DL = F->getParent()->getDataLayout();
 
   // Check to see which arguments are promotable.  If an argument is promotable,
@@ -674,8 +675,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       for (ScalarizeTable::iterator SI = ArgIndices.begin(),
              E = ArgIndices.end(); SI != E; ++SI) {
         // not allowed to dereference ->begin() if size() is 0
-        Params.push_back(
-            GetElementPtrInst::getIndexedType(I->getType(), SI->second));
+        Params.push_back(GetElementPtrInst::getIndexedType(
+            cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+            SI->second));
         assert(Params.back());
       }
 
@@ -704,7 +706,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   auto DI = FunctionDIs.find(F);
   if (DI != FunctionDIs.end()) {
     DISubprogram SP = DI->second;
-    SP.replaceFunction(NF);
+    SP->replaceFunction(NF);
     // Ensure the map is updated so it can be reused on subsequent argument
     // promotions of the same function.
     FunctionDIs.erase(DI);
@@ -860,7 +862,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 
     // Update the callgraph to know that the callsite has been transformed.
     CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
-    CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+    CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
 
     if (!Call->use_empty()) {
       Call->replaceAllUsesWith(New);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4431311..3be23d5 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -73,8 +73,8 @@ namespace {
       }
 
       std::string getDescription() const {
-        return std::string((IsArg ? "Argument #" : "Return value #"))
-               + utostr(Idx) + " of function " + F->getName().str();
+        return (Twine(IsArg ? "Argument #" : "Return value #") + utostr(Idx) +
+                " of function " + F->getName()).str();
       }
     };
 
@@ -304,7 +304,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   auto DI = FunctionDIs.find(&Fn);
   if (DI != FunctionDIs.end()) {
     DISubprogram SP = DI->second;
-    SP.replaceFunction(NF);
+    SP->replaceFunction(NF);
     // Ensure the map is updated so it can be reused on non-varargs argument
     // eliminations of the same function.
     FunctionDIs.erase(DI);
@@ -482,7 +482,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
       return Result;
     }
 
-    if (ImmutableCallSite CS = V) {
+    if (auto CS = ImmutableCallSite(V)) {
       const Function *F = CS.getCalledFunction();
       if (F) {
         // Used in a direct call.
@@ -1092,7 +1092,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // Patch the pointer to LLVM function in debug info descriptor.
   auto DI = FunctionDIs.find(F);
   if (DI != FunctionDIs.end())
-    DI->second.replaceFunction(NF);
+    DI->second->replaceFunction(NF);
 
   // Now that the old function is dead, delete it.
   F->eraseFromParent();
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 20b41fb..b8c4f5d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -564,6 +564,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
     if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
 
     Value *NewPtr = NewGlobals[Val];
+    Type *NewTy = NewGlobals[Val]->getType();
 
     // Form a shorter GEP if needed.
     if (GEP->getNumOperands() > 3) {
@@ -572,7 +573,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
         Idxs.push_back(NullInt);
         for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
           Idxs.push_back(CE->getOperand(i));
-        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr), Idxs);
+        NewPtr =
+            ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
+        NewTy = GetElementPtrInst::getIndexedType(NewTy, Idxs);
       } else {
         GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
         SmallVector<Value*, 8> Idxs;
@@ -721,8 +724,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
         else
           break;
       if (Idxs.size() == GEPI->getNumOperands()-1)
-        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
-                          ConstantExpr::getGetElementPtr(NewV, Idxs));
+        Changed |= OptimizeAwayTrappingUsesOfValue(
+            GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
       if (GEPI->use_empty()) {
         Changed = true;
         GEPI->eraseFromParent();
@@ -2338,7 +2341,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
               Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
               Constant * const IdxList[] = {IdxZero, IdxZero};
 
-              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
+              Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
               if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
                 Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
 
@@ -2402,8 +2405,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
            i != e; ++i)
         GEPOps.push_back(getVal(*i));
       InstResult =
-        ConstantExpr::getGetElementPtr(P, GEPOps,
-                                       cast<GEPOperator>(GEP)->isInBounds());
+          ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,
+                                         cast<GEPOperator>(GEP)->isInBounds());
       DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult
             << "\n");
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp
index fe00d92..f3f8529 100644
--- a/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/lib/Transforms/IPO/LowerBitSets.cpp
@@ -349,7 +349,8 @@ void LowerBitSets::allocateByteArrays() {
 
     Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
                         ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
-    Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs);
+    Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
+        ByteArrayConst->getType(), ByteArray, Idxs);
 
     // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
     // that the pc-relative displacement is folded into the lea instead of the
@@ -395,16 +396,17 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
     }
 
     Constant *ByteArray = BAI->ByteArray;
+    Type *Ty = BAI->ByteArray->getValueType();
     if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
       // Each use of the byte array uses a different alias. This makes the
       // backend less likely to reuse previously computed byte array addresses,
       // improving the security of the CFI mechanism based on this pass.
-      ByteArray = GlobalAlias::create(
-          BAI->ByteArray->getType()->getElementType(), 0,
-          GlobalValue::PrivateLinkage, "bits_use", ByteArray, M);
+      ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
+                                      GlobalValue::PrivateLinkage, "bits_use",
+                                      ByteArray, M);
     }
 
-    Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset);
+    Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
     Value *Byte = B.CreateLoad(ByteAddr);
 
     Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
@@ -546,8 +548,8 @@ void LowerBitSets::buildBitSetsFromGlobals(
     // Multiply by 2 to account for padding elements.
     Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
                                       ConstantInt::get(Int32Ty, I * 2)};
-    Constant *CombinedGlobalElemPtr =
-        ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs);
+    Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
+        NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
     if (LinkerSubsectionsViaSymbols) {
       Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
     } else {
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index d28d563..502451b 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -59,6 +59,10 @@ static cl::opt<bool>
 RunLoopRerolling("reroll-loops", cl::Hidden,
                  cl::desc("Run the loop rerolling pass"));
 
+static cl::opt<bool>
+RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
+             cl::desc("Run the float2int (float demotion) pass"));
+
 static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
                                     cl::Hidden,
                                     cl::desc("Run the load combining pass"));
@@ -307,6 +311,9 @@ void PassManagerBuilder::populateModulePassManager(
   // we must insert a no-op module pass to reset the pass manager.
   MPM.add(createBarrierNoopPass());
 
+  if (RunFloat2Int)
+    MPM.add(createFloat2IntPass());
+
   // Re-rotate loops in all our loop nests. These may have fallout out of
   // rotated form due to GVN or other transformations, and the vectorizer relies
   // on the rotated form.
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 816978e..ad7c5a0 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -305,33 +305,29 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
   SmallVector<Metadata *, 64> LiveSubprograms;
   DenseSet<const MDNode *> VisitedSet;
 
-  for (DICompileUnit DIC : F.compile_units()) {
-    assert(DIC.Verify() && "DIC must verify as a DICompileUnit.");
-
+  for (MDCompileUnit *DIC : F.compile_units()) {
     // Create our live subprogram list.
-    DIArray SPs = DIC.getSubprograms();
+    MDSubprogramArray SPs = DIC->getSubprograms();
     bool SubprogramChange = false;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram DISP(SPs.getElement(i));
-      assert(DISP.Verify() && "DISP must verify as a DISubprogram.");
+    for (unsigned i = 0, e = SPs.size(); i != e; ++i) {
+      DISubprogram DISP = SPs[i];
 
       // Make sure we visit each subprogram only once.
       if (!VisitedSet.insert(DISP).second)
         continue;
 
       // If the function referenced by DISP is not null, the function is live.
-      if (DISP.getFunction())
+      if (DISP->getFunction())
         LiveSubprograms.push_back(DISP);
       else
         SubprogramChange = true;
     }
 
     // Create our live global variable list.
-    DIArray GVs = DIC.getGlobalVariables();
+    MDGlobalVariableArray GVs = DIC->getGlobalVariables();
     bool GlobalVariableChange = false;
-    for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
-      DIGlobalVariable DIG(GVs.getElement(i));
-      assert(DIG.Verify() && "DIG must verify as DIGlobalVariable.");
+    for (unsigned i = 0, e = GVs.size(); i != e; ++i) {
+      DIGlobalVariable DIG = GVs[i];
 
       // Make sure we only visit each global variable only once.
       if (!VisitedSet.insert(DIG).second)
@@ -339,7 +335,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
 
       // If the global variable referenced by DIG is not null, the global
       // variable is live.
-      if (DIG.getGlobal())
+      if (DIG->getVariable())
         LiveGlobalVariables.push_back(DIG);
       else
         GlobalVariableChange = true;
@@ -349,12 +345,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
     // subprogram list/global variable list with our new live subprogram/global
     // variable list.
     if (SubprogramChange) {
-      DIC.replaceSubprograms(DIArray(MDNode::get(C, LiveSubprograms)));
+      DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
       Changed = true;
     }
 
     if (GlobalVariableChange) {
-      DIC.replaceGlobalVariables(DIArray(MDNode::get(C, LiveGlobalVariables)));
+      DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
       Changed = true;
     }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 21243c2..56b6cd3 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -197,12 +197,51 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   return nullptr;
 }
 
+static Value *SimplifyX86insertps(const IntrinsicInst &II,
+                                  InstCombiner::BuilderTy &Builder) {
+  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+    VectorType *VecTy = cast<VectorType>(II.getType());
+    ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+    
+    // The immediate permute control byte looks like this:
+    //    [3:0] - zero mask for each 32-bit lane
+    //    [5:4] - select one 32-bit destination lane
+    //    [7:6] - select one 32-bit source lane
+
+    uint8_t Imm = CInt->getZExtValue();
+    uint8_t ZMask = Imm & 0xf;
+    uint8_t DestLane = (Imm >> 4) & 0x3;
+    uint8_t SourceLane = (Imm >> 6) & 0x3;
+
+    // If all zero mask bits are set, this was just a weird way to
+    // generate a zero vector.
+    if (ZMask == 0xf)
+      return ZeroVector;
+    
+    // TODO: Model this case as two shuffles or a 'logical and' plus shuffle?
+    if (ZMask)
+      return nullptr;
+
+    assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
+
+    // If we're not zeroing anything, this is a single shuffle.
+    // Replace the selected destination lane with the selected source lane.
+    // For all other lanes, pass the first source bits through.
+    int ShuffleMask[4] = { 0, 1, 2, 3 };
+    ShuffleMask[DestLane] = SourceLane + 4;
+    
+    return Builder.CreateShuffleVector(II.getArgOperand(0), II.getArgOperand(1),
+                                       ShuffleMask);
+  }
+  return nullptr;
+}
+
 /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
 /// source vectors, unless a zero bit is set. If a zero bit is set,
 /// then ignore that half of the mask and clear that half of the vector.
 static Value *SimplifyX86vperm2(const IntrinsicInst &II,
                                 InstCombiner::BuilderTy &Builder) {
-  if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
     VectorType *VecTy = cast<VectorType>(II.getType());
     ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
 
@@ -415,112 +454,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     }
     break;
-  case Intrinsic::uadd_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II);
-    if (OR == OverflowResult::NeverOverflows)
-      return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false);
-    if (OR == OverflowResult::AlwaysOverflows)
-      return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true);
-  }
-  // FALL THROUGH uadd into sadd
+
+  case Intrinsic::uadd_with_overflow:
   case Intrinsic::sadd_with_overflow:
-    // Canonicalize constants into the RHS.
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::smul_with_overflow:
     if (isa<Constant>(II->getArgOperand(0)) &&
         !isa<Constant>(II->getArgOperand(1))) {
+      // Canonicalize constants into the RHS.
       Value *LHS = II->getArgOperand(0);
       II->setArgOperand(0, II->getArgOperand(1));
       II->setArgOperand(1, LHS);
       return II;
     }
+    // fall through
 
-    // X + undef -> undef
-    if (isa<UndefValue>(II->getArgOperand(1)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
-    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
-      // X + 0 -> {X, false}
-      if (RHS->isZero()) {
-        return CreateOverflowTuple(II, II->getArgOperand(0), false,
-                                    /*ReUseName*/false);
-      }
-    }
-
-    // We can strength reduce reduce this signed add into a regular add if we
-    // can prove that it will never overflow.
-    if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
-      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-      if (WillNotOverflowSignedAdd(LHS, RHS, *II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
-      }
-    }
-
-    break;
   case Intrinsic::usub_with_overflow:
   case Intrinsic::ssub_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    // undef - X -> undef
-    // X - undef -> undef
-    if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-
-    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
-      // X - 0 -> {X, false}
-      if (ConstRHS->isZero()) {
-        return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false);
-      }
-    }
-    if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
-      if (WillNotOverflowSignedSub(LHS, RHS, *II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
-      }
-    } else {
-      if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) {
-        return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
-      }
-    }
-    break;
-  }
-  case Intrinsic::umul_with_overflow: {
-    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II);
-    if (OR == OverflowResult::NeverOverflows)
-      return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false);
-    if (OR == OverflowResult::AlwaysOverflows)
-      return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true);
-  } // FALL THROUGH
-  case Intrinsic::smul_with_overflow:
-    // Canonicalize constants into the RHS.
-    if (isa<Constant>(II->getArgOperand(0)) &&
-        !isa<Constant>(II->getArgOperand(1))) {
-      Value *LHS = II->getArgOperand(0);
-      II->setArgOperand(0, II->getArgOperand(1));
-      II->setArgOperand(1, LHS);
-      return II;
-    }
-
-    // X * undef -> undef
-    if (isa<UndefValue>(II->getArgOperand(1)))
-      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+    OverflowCheckFlavor OCF =
+        IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID());
+    assert(OCF != OCF_INVALID && "unexpected!");
 
-    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
-      // X*0 -> {0, false}
-      if (RHSI->isZero())
-        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+    Value *OperationResult = nullptr;
+    Constant *OverflowResult = nullptr;
+    if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
+                              *II, OperationResult, OverflowResult))
+      return CreateOverflowTuple(II, OperationResult, OverflowResult);
 
-      // X * 1 -> {X, false}
-      if (RHSI->equalsInt(1)) {
-        return CreateOverflowTuple(II, II->getArgOperand(0), false,
-                                    /*ReUseName*/false);
-      }
-    }
-    if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
-      Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
-      if (WillNotOverflowSignedMul(LHS, RHS, *II)) {
-        return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
-      }
-    }
     break;
+  }
+
   case Intrinsic::minnum:
   case Intrinsic::maxnum: {
     Value *Arg0 = II->getArgOperand(0);
@@ -806,7 +769,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
     break;
   }
-
+  case Intrinsic::x86_sse41_insertps:
+    if (Value *V = SimplifyX86insertps(*II, *Builder))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+    
   case Intrinsic::x86_sse4a_insertqi: {
     // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
     // ones undef
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index fe544c2..bd79a26 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1450,42 +1450,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
       CI.setOperand(0, GEP->getOperand(0));
       return &CI;
     }
-
-    // If the GEP has a single use, and the base pointer is a bitcast, and the
-    // GEP computes a constant offset, see if we can convert these three
-    // instructions into fewer.  This typically happens with unions and other
-    // non-type-safe code.
-    unsigned AS = GEP->getPointerAddressSpace();
-    unsigned OffsetBits = DL.getPointerSizeInBits(AS);
-    APInt Offset(OffsetBits, 0);
-    BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
-    if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) {
-      // FIXME: This is insufficiently tested - just a no-crash test
-      // (test/Transforms/InstCombine/2007-05-14-Crash.ll)
-      //
-      // Get the base pointer input of the bitcast, and the type it points to.
-      Value *OrigBase = BCI->getOperand(0);
-      SmallVector<Value*, 8> NewIndices;
-      if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(),
-                              NewIndices)) {
-        // FIXME: This codepath is completely untested - could be unreachable
-        // for all I know.
-        // If we were able to index down into an element, create the GEP
-        // and bitcast the result.  This eliminates one bitcast, potentially
-        // two.
-        Value *NGEP = cast<GEPOperator>(GEP)->isInBounds()
-                          ? Builder->CreateInBoundsGEP(OrigBase, NewIndices)
-                          : Builder->CreateGEP(
-                                OrigBase->getType()->getPointerElementType(),
-                                OrigBase, NewIndices);
-        NGEP->takeName(GEP);
-
-        if (isa<BitCastInst>(CI))
-          return new BitCastInst(NGEP, CI.getType());
-        assert(isa<PtrToIntInst>(CI));
-        return new PtrToIntInst(NGEP, CI.getType());
-      }
-    }
   }
 
   return commonCastTransforms(CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 803b50a..223bba0 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2109,33 +2109,112 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   return ExtractValueInst::Create(Call, 1, "sadd.overflow");
 }
 
-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
-                                     InstCombiner &IC) {
-  // Don't bother doing this transformation for pointers, don't do it for
-  // vectors.
-  if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
+bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
+                                         Value *RHS, Instruction &OrigI,
+                                         Value *&Result, Constant *&Overflow) {
+  assert((!OrigI.isCommutative() ||
+          !(isa<Constant>(LHS) && !isa<Constant>(RHS))) &&
+         "call with a constant RHS if possible!");
+
+  auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
+    Result = OpResult;
+    Overflow = OverflowVal;
+    if (ReuseName)
+      Result->takeName(&OrigI);
+    return true;
+  };
 
-  // If the add is a constant expr, then we don't bother transforming it.
-  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
-  if (!OrigAdd) return nullptr;
+  switch (OCF) {
+  case OCF_INVALID:
+    llvm_unreachable("bad overflow check kind!");
 
-  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+  case OCF_UNSIGNED_ADD: {
+    OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
+    if (OR == OverflowResult::NeverOverflows)
+      return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(),
+                       true);
 
-  // Put the new code above the original add, in case there are any uses of the
-  // add between the add and the compare.
-  InstCombiner::BuilderTy *Builder = IC.Builder;
-  Builder->SetInsertPoint(OrigAdd);
+    if (OR == OverflowResult::AlwaysOverflows)
+      return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+  }
+  // FALL THROUGH uadd into sadd
+  case OCF_SIGNED_ADD: {
+    // X + undef -> undef
+    if (isa<UndefValue>(RHS))
+      return SetResult(UndefValue::get(RHS->getType()),
+                       UndefValue::get(Builder->getInt1Ty()), false);
+
+    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS))
+      // X + 0 -> {X, false}
+      if (ConstRHS->isZero())
+        return SetResult(LHS, Builder->getFalse(), false);
+
+    // We can strength reduce this signed add into a regular add if we can prove
+    // that it will never overflow.
+    if (OCF == OCF_SIGNED_ADD)
+      if (WillNotOverflowSignedAdd(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(),
+                         true);
+  }
 
-  Module *M = I.getParent()->getParent()->getParent();
-  Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
-  Value *Add = Builder->CreateExtractValue(Call, 0);
+  case OCF_UNSIGNED_SUB:
+  case OCF_SIGNED_SUB: {
+    // undef - X -> undef
+    // X - undef -> undef
+    if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
+      return SetResult(UndefValue::get(LHS->getType()),
+                       UndefValue::get(Builder->getInt1Ty()), false);
+
+    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS))
+      // X - 0 -> {X, false}
+      if (ConstRHS->isZero())
+        return SetResult(UndefValue::get(LHS->getType()), Builder->getFalse(),
+                         false);
+
+    if (OCF == OCF_SIGNED_SUB) {
+      if (WillNotOverflowSignedSub(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(),
+                         true);
+    } else {
+      if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(),
+                         true);
+    }
+    break;
+  }
 
-  IC.ReplaceInstUsesWith(*OrigAdd, Add);
+  case OCF_UNSIGNED_MUL: {
+    OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
+    if (OR == OverflowResult::NeverOverflows)
+      return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(),
+                       true);
+    if (OR == OverflowResult::AlwaysOverflows)
+      return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
+  } // FALL THROUGH
+  case OCF_SIGNED_MUL:
+    // X * undef -> undef
+    if (isa<UndefValue>(RHS))
+      return SetResult(UndefValue::get(LHS->getType()),
+                       UndefValue::get(Builder->getInt1Ty()), false);
+
+    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(RHS)) {
+      // X * 0 -> {0, false}
+      if (RHSI->isZero())
+        return SetResult(Constant::getNullValue(RHS->getType()),
+                         Builder->getFalse(), false);
+
+      // X * 1 -> {X, false}
+      if (RHSI->equalsInt(1))
+        return SetResult(LHS, Builder->getFalse(), false);
+    }
 
-  // The original icmp gets replaced with the overflow value.
-  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+    if (OCF == OCF_SIGNED_MUL)
+      if (WillNotOverflowSignedMul(LHS, RHS, OrigI))
+        return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(),
+                         true);
+  }
+
+  return false;
 }
 
 /// \brief Recognize and process idiom involving test for multiplication
@@ -3432,21 +3511,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
     }
 
-    // (a+b) <u a  --> llvm.uadd.with.overflow.
-    // (a+b) <u b  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
-        match(Op0, m_Add(m_Value(A), m_Value(B))) &&
-        (Op1 == A || Op1 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
-        return R;
-
-    // a >u (a+b)  --> llvm.uadd.with.overflow.
-    // b >u (a+b)  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
-        (Op0 == A || Op0 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
-        return R;
+    Instruction *AddI = nullptr;
+    if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+                                     m_Instruction(AddI))) &&
+        isa<IntegerType>(A->getType())) {
+      Value *Result;
+      Constant *Overflow;
+      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+                                Overflow)) {
+        ReplaceInstUsesWith(*AddI, Result);
+        return ReplaceInstUsesWith(I, Overflow);
+      }
+    }
 
     // (zext a) * (zext b)  --> llvm.umul.with.overflow.
     if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index fb2321d..d0caf34 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -110,6 +110,41 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
   return false;
 }
 
+
+/// \brief Specific patterns of overflow check idioms that we match.
+enum OverflowCheckFlavor {
+  OCF_UNSIGNED_ADD,
+  OCF_SIGNED_ADD,
+  OCF_UNSIGNED_SUB,
+  OCF_SIGNED_SUB,
+  OCF_UNSIGNED_MUL,
+  OCF_SIGNED_MUL,
+
+  OCF_INVALID
+};
+
+/// \brief Returns the OverflowCheckFlavor corresponding to a overflow_with_op
+/// intrinsic.
+static inline OverflowCheckFlavor
+IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
+  switch (ID) {
+  default:
+    return OCF_INVALID;
+  case Intrinsic::uadd_with_overflow:
+    return OCF_UNSIGNED_ADD;
+  case Intrinsic::sadd_with_overflow:
+    return OCF_SIGNED_ADD;
+  case Intrinsic::usub_with_overflow:
+    return OCF_UNSIGNED_SUB;
+  case Intrinsic::ssub_with_overflow:
+    return OCF_SIGNED_SUB;
+  case Intrinsic::umul_with_overflow:
+    return OCF_UNSIGNED_MUL;
+  case Intrinsic::smul_with_overflow:
+    return OCF_SIGNED_MUL;
+  }
+}
+
 /// \brief An IRBuilder inserter that adds new instructions to the instcombine
 /// worklist.
 class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
@@ -316,7 +351,7 @@ private:
   bool ShouldChangeType(Type *From, Type *To) const;
   Value *dyn_castNegVal(Value *V) const;
   Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
-  Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
+  Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
                             SmallVectorImpl<Value *> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
 
@@ -329,6 +364,17 @@ private:
   bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
                           Type *Ty);
 
+  /// \brief Try to optimize a sequence of instructions checking if an operation
+  /// on LHS and RHS overflows.
+  ///
+  /// If a simplification is possible, stores the simplified result of the
+  /// operation in OperationResult and result of the overflow check in
+  /// OverflowResult, and return true.  If no simplification is possible,
+  /// returns false.
+  bool OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS,
+                             Instruction &CtxI, Value *&OperationResult,
+                             Constant *&OverflowResult);
+
   Instruction *visitCallSite(CallSite CS);
   Instruction *tryOptimizeCall(CallInst *CI);
   bool transformConstExprCastCall(CallSite CS);
@@ -391,14 +437,10 @@ public:
   }
 
   /// Creates a result tuple for an overflow intrinsic \p II with a given
-  /// \p Result and a constant \p Overflow value. If \p ReUseName is true the
-  /// \p Result's name is taken from \p II.
+  /// \p Result and a constant \p Overflow value.
   Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result,
-                                   bool Overflow, bool ReUseName = true) {
-    if (ReUseName)
-      Result->takeName(II);
-    Constant *V[] = {UndefValue::get(Result->getType()),
-                     Overflow ? Builder->getTrue() : Builder->getFalse()};
+                                   Constant *Overflow) {
+    Constant *V[] = {UndefValue::get(Result->getType()), Overflow};
     StructType *ST = cast<StructType>(II->getType());
     Constant *Struct = ConstantStruct::get(ST, V);
     return InsertValueInst::Create(Struct, Result, 0);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 6b0f268..d8a559c 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -84,7 +84,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
         continue;
       }
 
-      if (CallSite CS = I) {
+      if (auto CS = CallSite(I)) {
         // If this is the function being called then we treat it like a load and
         // ignore it.
         if (CS.isCallee(&U))
@@ -611,8 +611,10 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
     return false;
 
   SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
-  Type *AllocTy =
-    GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops);
+  Type *AllocTy = GetElementPtrInst::getIndexedType(
+      cast<PointerType>(GEPI->getOperand(0)->getType()->getScalarType())
+          ->getElementType(),
+      Ops);
   if (!AllocTy || !AllocTy->isSized())
     return false;
   const DataLayout &DL = IC.getDataLayout();
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index b6beb65..24446c8 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -987,8 +987,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     unsigned BegIdx = Mask.front();
     VectorType *SrcTy = cast<VectorType>(V->getType());
     unsigned VecBitWidth = SrcTy->getBitWidth();
-    unsigned SrcElemBitWidth =
-        SrcTy->getElementType()->getPrimitiveSizeInBits();
+    unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
     assert(SrcElemBitWidth && "vector elements must have a bitwidth");
     unsigned SrcNumElems = SrcTy->getNumElements();
     SmallVector<BitCastInst *, 8> BCs;
@@ -1000,7 +999,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
           BCs.push_back(BC);
     for (BitCastInst *BC : BCs) {
       Type *TgtTy = BC->getDestTy();
-      unsigned TgtElemBitWidth = TgtTy->getPrimitiveSizeInBits();
+      unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
       if (!TgtElemBitWidth)
         continue;
       unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 90551e4..3b46156 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -869,11 +869,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
 /// whether or not there is a sequence of GEP indices into the pointed type that
 /// will land us at the specified offset.  If so, fill them into NewIndices and
 /// return the resultant element type, otherwise return null.
-Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
+Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
                                         SmallVectorImpl<Value *> &NewIndices) {
-  assert(PtrTy->isPtrOrPtrVectorTy());
-
-  Type *Ty = PtrTy->getPointerElementType();
+  Type *Ty = PtrTy->getElementType();
   if (!Ty->isSized())
     return nullptr;
 
@@ -1611,12 +1609,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // %0 = GEP [10 x i8] addrspace(1)* X, ...
             // addrspacecast i8 addrspace(1)* %0 to i8*
             SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
-            Value *NewGEP =
-                GEP.isInBounds()
-                    ? Builder->CreateInBoundsGEP(StrippedPtr, Idx,
-                                                 GEP.getName())
-                    : Builder->CreateGEP(StrippedPtrTy->getElementType(),
-                                         StrippedPtr, Idx, GEP.getName());
+            Value *NewGEP = GEP.isInBounds()
+                                ? Builder->CreateInBoundsGEP(
+                                      nullptr, StrippedPtr, Idx, GEP.getName())
+                                : Builder->CreateGEP(nullptr, StrippedPtr, Idx,
+                                                     GEP.getName());
             return new AddrSpaceCastInst(NewGEP, GEP.getType());
           }
         }
@@ -1634,9 +1631,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
         Value *NewGEP =
             GEP.isInBounds()
-                ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName())
-                : Builder->CreateGEP(StrippedPtrTy->getElementType(),
-                                     StrippedPtr, Idx, GEP.getName());
+                ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx,
+                                             GEP.getName())
+                : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName());
 
         // V and GEP are both pointer types --> BitCast
         return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1669,10 +1666,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
             // GEP may not be "inbounds".
             Value *NewGEP =
                 GEP.isInBounds() && NSW
-                    ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx,
+                    ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx,
                                                  GEP.getName())
-                    : Builder->CreateGEP(StrippedPtrTy->getElementType(),
-                                         StrippedPtr, NewIdx, GEP.getName());
+                    : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx,
+                                         GEP.getName());
 
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1710,9 +1707,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                 Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
                 NewIdx};
 
-            Value *NewGEP = GEP.isInBounds() && NSW ?
-              Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
-              Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName());
+            Value *NewGEP = GEP.isInBounds() && NSW
+                                ? Builder->CreateInBoundsGEP(
+                                      SrcElTy, StrippedPtr, Off, GEP.getName())
+                                : Builder->CreateGEP(SrcElTy, StrippedPtr, Off,
+                                                     GEP.getName());
             // The NewGEP must be pointer typed, so must the old one -> BitCast
             return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
                                                                  GEP.getType());
@@ -1774,9 +1773,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // GEP.
       SmallVector<Value*, 8> NewIndices;
       if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
-        Value *NGEP = GEP.isInBounds() ?
-          Builder->CreateInBoundsGEP(Operand, NewIndices) :
-          Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices);
+        Value *NGEP =
+            GEP.isInBounds()
+                ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices)
+                : Builder->CreateGEP(nullptr, Operand, NewIndices);
 
         if (NGEP->getType() == GEP.getType())
           return ReplaceInstUsesWith(GEP, NGEP);
@@ -2268,7 +2268,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // We need to insert these at the location of the old load, not at that of
       // the extractvalue.
       Builder->SetInsertPoint(L->getParent(), L);
-      Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(), Indices);
+      Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
+                                              L->getPointerOperand(), Indices);
       // Returning the load directly will cause the main loop to insert it in
       // the wrong spot, so use ReplaceInstUsesWith().
       return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
@@ -2725,7 +2726,7 @@ bool InstCombiner::run() {
         DEBUG(dbgs() << "IC: Old = " << *I << '\n'
                      << "    New = " << *Result << '\n');
 
-        if (!I->getDebugLoc().isUnknown())
+        if (I->getDebugLoc())
           Result->setDebugLoc(I->getDebugLoc());
         // Everything uses the new instruction now.
         I->replaceAllUsesWith(Result);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 978c857..8d6d3ce 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -401,12 +401,12 @@ struct AddressSanitizer : public FunctionPass {
     return SizeInBytes;
   }
   /// Check if we want (and can) handle this alloca.
-  bool isInterestingAlloca(AllocaInst &AI) const;
+  bool isInterestingAlloca(AllocaInst &AI);
   /// If it is an interesting memory access, return the PointerOperand
   /// and set IsWrite/Alignment. Otherwise return nullptr.
   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
                                    uint64_t *TypeSize,
-                                   unsigned *Alignment) const;
+                                   unsigned *Alignment);
   void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
                      bool UseCalls, const DataLayout &DL);
   void instrumentPointerComparisonOrSubtraction(Instruction *I);
@@ -458,6 +458,7 @@ struct AddressSanitizer : public FunctionPass {
   Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
   InlineAsm *EmptyAsm;
   GlobalsMetadata GlobalsMD;
+  DenseMap<AllocaInst *, bool> ProcessedAllocas;
 
   friend struct FunctionStackPoisoner;
 };
@@ -804,13 +805,21 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
 }
 
 /// Check if we want (and can) handle this alloca.
-bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const {
-  return (AI.getAllocatedType()->isSized() &&
-          // alloca() may be called with 0 size, ignore it.
-          getAllocaSizeInBytes(&AI) > 0 &&
-          // We are only interested in allocas not promotable to registers.
-          // Promotable allocas are common under -O0.
-          (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
+bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
+  auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI);
+
+  if (PreviouslySeenAllocaInfo != ProcessedAllocas.end())
+    return PreviouslySeenAllocaInfo->getSecond();
+
+  bool IsInteresting = (AI.getAllocatedType()->isSized() &&
+    // alloca() may be called with 0 size, ignore it.
+    getAllocaSizeInBytes(&AI) > 0 &&
+    // We are only interested in allocas not promotable to registers.
+    // Promotable allocas are common under -O0.
+    (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
+
+  ProcessedAllocas[&AI] = IsInteresting;
+  return IsInteresting;
 }
 
 /// If I is an interesting memory access, return the PointerOperand
@@ -818,7 +827,7 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const {
 Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
                                                    bool *IsWrite,
                                                    uint64_t *TypeSize,
-                                                   unsigned *Alignment) const {
+                                                   unsigned *Alignment) {
   // Skip memory accesses inserted by another instrumentation.
   if (I->getMetadata("nosanitize")) return nullptr;
 
@@ -959,18 +968,6 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
                                    UseCalls, Exp);
 }
 
-// Validate the result of Module::getOrInsertFunction called for an interface
-// function of AddressSanitizer. If the instrumented module defines a function
-// with the same name, their prototypes must match, otherwise
-// getOrInsertFunction returns a bitcast.
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast);
-  FuncOrBitcast->dump();
-  report_fatal_error(
-      "trying to redefine an AddressSanitizer "
-      "interface function");
-}
-
 Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
                                                  Value *Addr, bool IsWrite,
                                                  size_t AccessSizeIndex,
@@ -1056,7 +1053,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
     // path is rarely taken. This seems to be the case for SPEC benchmarks.
     TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen(
         Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
-    assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
+    assert(cast<BranchInst>(CheckTerm)->isUnconditional());
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
     IRB.SetInsertPoint(CheckTerm);
     Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
@@ -1219,17 +1216,17 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
 void AddressSanitizerModule::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   // Declare our poisoning and unpoisoning functions.
-  AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
   AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
-  AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr));
   AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
   // Declare functions that register/unregister globals.
-  AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
-  AsanUnregisterGlobals = checkInterfaceFunction(
+  AsanUnregisterGlobals = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
                             IntptrTy, IntptrTy, nullptr));
   AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
@@ -1317,7 +1314,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
     Indices2[1] = IRB.getInt32(0);
 
     G->replaceAllUsesWith(
-        ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true));
+        ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));
     NewGlobal->takeName(G);
     G->eraseFromParent();
 
@@ -1399,44 +1396,44 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
       const std::string ExpStr = Exp ? "exp_" : "";
       const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
       AsanErrorCallbackSized[AccessIsWrite][Exp] =
-          checkInterfaceFunction(M.getOrInsertFunction(
+          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
               kAsanReportErrorTemplate + ExpStr + TypeStr + "_n",
               IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
       AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
-          checkInterfaceFunction(M.getOrInsertFunction(
+          checkSanitizerInterfaceFunction(M.getOrInsertFunction(
               ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N",
               IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
       for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
            AccessSizeIndex++) {
         const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
         AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
-            checkInterfaceFunction(M.getOrInsertFunction(
+            checkSanitizerInterfaceFunction(M.getOrInsertFunction(
                 kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(),
                 IntptrTy, ExpType, nullptr));
         AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
-            checkInterfaceFunction(M.getOrInsertFunction(
+            checkSanitizerInterfaceFunction(M.getOrInsertFunction(
                 ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(),
                 IntptrTy, ExpType, nullptr));
       }
     }
   }
 
-  AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  AsanMemset = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),
       IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr));
 
-  AsanHandleNoReturnFunc = checkInterfaceFunction(
+  AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr));
 
-  AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-  AsanPtrSubFunction = checkInterfaceFunction(M.getOrInsertFunction(
+  AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -1461,7 +1458,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
   BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
   // call __asan_init in the module ctor.
   IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
-  AsanInitFunction = checkInterfaceFunction(
+  AsanInitFunction = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), nullptr));
   AsanInitFunction->setLinkage(Function::ExternalLinkage);
   IRB.CreateCall(AsanInitFunction);
@@ -1613,16 +1610,17 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
   for (int i = 0; i <= kMaxAsanStackMallocSizeClass; i++) {
     std::string Suffix = itostr(i);
-    AsanStackMallocFunc[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        kAsanStackMallocNameTemplate + Suffix, IntptrTy, IntptrTy, nullptr));
-    AsanStackFreeFunc[i] = checkInterfaceFunction(
+    AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction(
+        M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
+                              IntptrTy, nullptr));
+    AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
                               IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   }
-  AsanPoisonStackMemoryFunc = checkInterfaceFunction(
+  AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
                             IntptrTy, IntptrTy, nullptr));
-  AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(
+  AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
                             IntptrTy, IntptrTy, nullptr));
 }
@@ -1757,9 +1755,11 @@ void FunctionStackPoisoner::poisonStack() {
   uint64_t LocalStackSize = L.FrameSize;
   bool DoStackMalloc =
       ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize;
-  // Don't do dynamic alloca in presence of inline asm: too often it
-  // makes assumptions on which registers are available.
+  // Don't do dynamic alloca in presence of inline asm: too often it makes
+  // assumptions on which registers are available. Don't do stack malloc in the
+  // presence of inline asm on 32-bit platforms for the same reason.
   bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm;
+  DoStackMalloc &= !HasNonEmptyInlineAsm || ASan.LongSize != 32;
 
   Value *StaticAlloca =
       DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index b3925ee..06d5aed 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -753,7 +753,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       // Patch the pointer to LLVM function in debug info descriptor.
       auto DI = FunctionDIs.find(&F);
       if (DI != FunctionDIs.end())
-        DI->second.replaceFunction(&F);
+        DI->second->replaceFunction(&F);
 
       UnwrappedFnMap[WrappedFnCst] = &F;
       *i = NewF;
@@ -1075,8 +1075,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
   }
   case 2: {
     IRBuilder<> IRB(Pos);
-    Value *ShadowAddr1 =
-        IRB.CreateGEP(ShadowAddr, ConstantInt::get(DFS.IntptrTy, 1));
+    Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
+                                       ConstantInt::get(DFS.IntptrTy, 1));
     return combineShadows(IRB.CreateAlignedLoad(ShadowAddr, ShadowAlign),
                           IRB.CreateAlignedLoad(ShadowAddr1, ShadowAlign), Pos);
   }
@@ -1127,7 +1127,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
       BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
       DT.addNewBlock(NextBB, LastBr->getParent());
       IRBuilder<> NextIRB(NextBB);
-      WideAddr = NextIRB.CreateGEP(WideAddr, ConstantInt::get(DFS.IntptrTy, 1));
+      WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
+                                   ConstantInt::get(DFS.IntptrTy, 1));
       Value *NextWideShadow = NextIRB.CreateAlignedLoad(WideAddr, ShadowAlign);
       ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
       LastBr->setSuccessor(0, NextBB);
@@ -1213,7 +1214,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
     Value *ShadowVecAddr =
         IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
     do {
-      Value *CurShadowVecAddr = IRB.CreateConstGEP1_32(ShadowVecAddr, Offset);
+      Value *CurShadowVecAddr =
+          IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
       IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
       Size -= ShadowVecSize;
       ++Offset;
@@ -1221,7 +1223,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
     Offset *= ShadowVecSize;
   }
   while (Size > 0) {
-    Value *CurShadowAddr = IRB.CreateConstGEP1_32(ShadowAddr, Offset);
+    Value *CurShadowAddr =
+        IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);
     IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
     --Size;
     ++Offset;
@@ -1469,17 +1472,17 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
           Args.push_back(DFSF.getShadow(*i));
 
         if (FT->isVarArg()) {
-          auto LabelVAAlloca =
-              new AllocaInst(ArrayType::get(DFSF.DFS.ShadowTy,
-                                            CS.arg_size() - FT->getNumParams()),
-                             "labelva", DFSF.F->getEntryBlock().begin());
+          auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
+                                           CS.arg_size() - FT->getNumParams());
+          auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva",
+                                               DFSF.F->getEntryBlock().begin());
 
           for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
-            auto LabelVAPtr = IRB.CreateStructGEP(LabelVAAlloca, n);
+            auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
             IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
           }
 
-          Args.push_back(IRB.CreateStructGEP(LabelVAAlloca, 0));
+          Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
         }
 
         if (!FT->getReturnType()->isVoidTy()) {
@@ -1565,10 +1568,11 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
       ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
       AllocaInst *VarArgShadow =
           new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
-      Args.push_back(IRB.CreateConstGEP2_32(VarArgShadow, 0, 0));
+      Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
       for (unsigned n = 0; i != e; ++i, ++n) {
-        IRB.CreateStore(DFSF.getShadow(*i),
-                        IRB.CreateConstGEP2_32(VarArgShadow, 0, n));
+        IRB.CreateStore(
+            DFSF.getShadow(*i),
+            IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));
         Args.push_back(*i);
       }
     }
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index a793e69..368a81d 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -149,10 +149,10 @@ ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) {
   return new GCOVProfiler(Options);
 }
 
-static StringRef getFunctionName(DISubprogram SP) {
-  if (!SP.getLinkageName().empty())
-    return SP.getLinkageName();
-  return SP.getName();
+static StringRef getFunctionName(MDSubprogram *SP) {
+  if (!SP->getLinkageName().empty())
+    return SP->getLinkageName();
+  return SP->getName();
 }
 
 namespace {
@@ -163,7 +163,7 @@ namespace {
     static const char *const BlockTag;
     static const char *const EdgeTag;
 
-    GCOVRecord() {}
+    GCOVRecord() = default;
 
     void writeBytes(const char *Bytes, int Size) {
       os->write(Bytes, Size);
@@ -315,7 +315,7 @@ namespace {
            ReturnBlock(1, os) {
       this->os = os;
 
-      Function *F = SP.getFunction();
+      Function *F = SP->getFunction();
       DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
 
       uint32_t i = 0;
@@ -330,7 +330,7 @@ namespace {
 
       std::string FunctionNameAndLine;
       raw_string_ostream FNLOS(FunctionNameAndLine);
-      FNLOS << getFunctionName(SP) << SP.getLineNumber();
+      FNLOS << getFunctionName(SP) << SP->getLine();
       FNLOS.flush();
       FuncChecksum = hash_value(FunctionNameAndLine);
     }
@@ -366,7 +366,7 @@ namespace {
     void writeOut() {
       writeBytes(FunctionTag, 4);
       uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) +
-          1 + lengthOfGCOVString(SP.getFilename()) + 1;
+                          1 + lengthOfGCOVString(SP->getFilename()) + 1;
       if (UseCfgChecksum)
         ++BlockLen;
       write(BlockLen);
@@ -375,8 +375,8 @@ namespace {
       if (UseCfgChecksum)
         write(CfgChecksum);
       writeGCOVString(getFunctionName(SP));
-      writeGCOVString(SP.getFilename());
-      write(SP.getLineNumber());
+      writeGCOVString(SP->getFilename());
+      write(SP->getLine());
 
       // Emit count of blocks.
       writeBytes(BlockTag, 4);
@@ -437,12 +437,12 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
     }
   }
 
-  SmallString<128> Filename = CU.getFilename();
+  SmallString<128> Filename = CU->getFilename();
   sys::path::replace_extension(Filename, NewStem);
   StringRef FName = sys::path::filename(Filename);
   SmallString<128> CurPath;
   if (sys::fs::current_path(CurPath)) return FName;
-  sys::path::append(CurPath, FName.str());
+  sys::path::append(CurPath, FName);
   return CurPath.str();
 }
 
@@ -466,7 +466,8 @@ static bool functionHasLines(Function *F) {
       if (isa<DbgInfoIntrinsic>(I)) continue;
 
       const DebugLoc &Loc = I->getDebugLoc();
-      if (Loc.isUnknown()) continue;
+      if (!Loc)
+        continue;
 
       // Artificial lines such as calls to the global constructors.
       if (Loc.getLine() == 0) continue;
@@ -486,21 +487,14 @@ void GCOVProfiler::emitProfileNotes() {
     // this pass over the original .o's as they're produced, or run it after
     // LTO, we'll generate the same .gcno files.
 
-    DICompileUnit CU(CU_Nodes->getOperand(i));
+    DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));
     std::error_code EC;
     raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None);
     std::string EdgeDestinations;
 
-    DIArray SPs = CU.getSubprograms();
     unsigned FunctionIdent = 0;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      assert((!SP || SP.isSubprogram()) &&
-        "A MDNode in subprograms of a CU should be null or a DISubprogram.");
-      if (!SP)
-        continue;
-
-      Function *F = SP.getFunction();
+    for (auto *SP : CU->getSubprograms()) {
+      Function *F = SP->getFunction();
       if (!F) continue;
       if (!functionHasLines(F)) continue;
 
@@ -536,16 +530,18 @@ void GCOVProfiler::emitProfileNotes() {
           if (isa<DbgInfoIntrinsic>(I)) continue;
 
           const DebugLoc &Loc = I->getDebugLoc();
-          if (Loc.isUnknown()) continue;
+          if (!Loc)
+            continue;
 
           // Artificial lines such as calls to the global constructors.
           if (Loc.getLine() == 0) continue;
 
           if (Line == Loc.getLine()) continue;
           Line = Loc.getLine();
-          if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+          if (SP != getDISubprogram(Loc.getScope()))
+            continue;
 
-          GCOVLines &Lines = Block.getFile(SP.getFilename());
+          GCOVLines &Lines = Block.getFile(SP->getFilename());
           Lines.addLine(Loc.getLine());
         }
       }
@@ -574,16 +570,10 @@ bool GCOVProfiler::emitProfileArcs() {
   bool Result = false;
   bool InsertIndCounterIncrCode = false;
   for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-    DICompileUnit CU(CU_Nodes->getOperand(i));
-    DIArray SPs = CU.getSubprograms();
+    DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));
     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
-    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-      DISubprogram SP(SPs.getElement(i));
-      assert((!SP || SP.isSubprogram()) &&
-        "A MDNode in subprograms of a CU should be null or a DISubprogram.");
-      if (!SP)
-        continue;
-      Function *F = SP.getFunction();
+    for (auto *SP : CU->getSubprograms()) {
+      Function *F = SP->getFunction();
       if (!F) continue;
       if (!functionHasLines(F)) continue;
       if (!Result) Result = true;
@@ -603,7 +593,7 @@ bool GCOVProfiler::emitProfileArcs() {
                            GlobalValue::InternalLinkage,
                            Constant::getNullValue(CounterTy),
                            "__llvm_gcov_ctr");
-      CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP));
+      CountersBySP.push_back(std::make_pair(Counters, SP));
 
       UniqueVector<BasicBlock *> ComplexEdgePreds;
       UniqueVector<BasicBlock *> ComplexEdgeSuccs;
@@ -628,7 +618,8 @@ bool GCOVProfiler::emitProfileArcs() {
             SmallVector<Value *, 2> Idx;
             Idx.push_back(Builder.getInt64(0));
             Idx.push_back(Sel);
-            Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx);
+            Value *Counter = Builder.CreateInBoundsGEP(Counters->getValueType(),
+                                                       Counters, Idx);
             Value *Count = Builder.CreateLoad(Counter);
             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
@@ -855,7 +846,7 @@ Function *GCOVProfiler::insertCounterWriteout(
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
   if (CU_Nodes) {
     for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      DICompileUnit CU(CU_Nodes->getOperand(i));
+      DICompileUnit CU = cast<MDCompileUnit>(CU_Nodes->getOperand(i));
       std::string FilenameGcda = mangleName(CU, "gcda");
       uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
       Builder.CreateCall3(StartFile,
@@ -863,7 +854,7 @@ Function *GCOVProfiler::insertCounterWriteout(
                           Builder.CreateGlobalStringPtr(ReversedVersion),
                           Builder.getInt32(CfgChecksum));
       for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) {
-        DISubprogram SP(CountersBySP[j].second);
+        auto *SP = cast_or_null<MDSubprogram>(CountersBySP[j].second);
         uint32_t FuncChecksum = Funcs.empty() ? 0 : Funcs[j]->getFuncChecksum();
         Builder.CreateCall5(
             EmitFunction, Builder.getInt32(j),
@@ -922,7 +913,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
   Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
   Arg = std::next(Fn->arg_begin());
   Arg->setName("counters");
-  Value *GEP = Builder.CreateGEP(Arg, ZExtPred);
+  Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
   Value *Counter = Builder.CreateLoad(GEP, "counter");
   Cond = Builder.CreateICmpEQ(Counter,
                               Constant::getNullValue(
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index c2aa1e2..2b35066 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -623,8 +623,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       Value *IntptrOriginPtr =
           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
-        Value *Ptr =
-            i ? IRB.CreateConstGEP1_32(IntptrOriginPtr, i) : IntptrOriginPtr;
+        Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
+                       : IntptrOriginPtr;
         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
         Ofs += IntptrSize / kOriginSize;
         CurrentAlignment = IntptrAlignment;
@@ -632,7 +632,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
 
     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
-      Value *GEP = i ? IRB.CreateConstGEP1_32(OriginPtr, i) : OriginPtr;
+      Value *GEP =
+          i ? IRB.CreateConstGEP1_32(nullptr, OriginPtr, i) : OriginPtr;
       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
       CurrentAlignment = kMinOriginAlignment;
     }
@@ -2843,7 +2844,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
       Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
       Value *OverflowArgAreaShadowPtr =
         MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
-      Value *SrcPtr = IRB.CreateConstGEP1_32(VAArgTLSCopy, AMD64FpEndOffset);
+      Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
+                                             AMD64FpEndOffset);
       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
     }
   }
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 289675e..662513d 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -140,16 +140,6 @@ class SanitizerCoverageModule : public ModulePass {
 
 }  // namespace
 
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (Function *F = dyn_cast<Function>(FuncOrBitcast))
-     return F;
-  std::string Err;
-  raw_string_ostream Stream(Err);
-  Stream << "SanitizerCoverage interface function redefined: "
-         << *FuncOrBitcast;
-  report_fatal_error(Err);
-}
-
 bool SanitizerCoverageModule::runOnModule(Module &M) {
   if (!CoverageLevel) return false;
   C = &(M.getContext());
@@ -167,16 +157,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   ReturnInst::Create(*C, BasicBlock::Create(*C, "", CtorFunc));
   appendToGlobalCtors(M, CtorFunc, kSanCtorAndDtorPriority);
 
-  SanCovFunction = checkInterfaceFunction(
+  SanCovFunction = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kSanCovName, VoidTy, Int32PtrTy, nullptr));
-  SanCovWithCheckFunction = checkInterfaceFunction(
+  SanCovWithCheckFunction = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr));
-  SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction(
-      kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
-  SanCovTraceCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
-      kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
-
-  SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction(
+  SanCovIndirCallFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
+  SanCovTraceCmpFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+
+  SanCovModuleInit = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kSanCovModuleInitName, Type::getVoidTy(*C), Int32PtrTy, IntptrTy,
       Int8PtrTy, Int8PtrTy, nullptr));
   SanCovModuleInit->setLinkage(Function::ExternalLinkage);
@@ -186,9 +178,9 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
                             /*hasSideEffects=*/true);
 
   if (ClExperimentalTracing) {
-    SanCovTraceEnter = checkInterfaceFunction(
+    SanCovTraceEnter = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
-    SanCovTraceBB = checkInterfaceFunction(
+    SanCovTraceBB = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
   }
 
@@ -316,7 +308,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
     IRBuilder<> IRB(I);
     CallSite CS(I);
     Value *Callee = CS.getCalledValue();
-    if (dyn_cast<InlineAsm>(Callee)) continue;
+    if (isa<InlineAsm>(Callee)) continue;
     GlobalVariable *CalleeCache = new GlobalVariable(
         *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
         Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
@@ -366,8 +358,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
   }
 
   bool IsEntryBB = &BB == &F.getEntryBlock();
-  DebugLoc EntryLoc = IsEntryBB && !IP->getDebugLoc().isUnknown()
-                          ? IP->getDebugLoc().getFnDebugLoc(*C)
+  DebugLoc EntryLoc = IsEntryBB && IP->getDebugLoc()
+                          ? IP->getDebugLoc().getFnDebugLoc()
                           : IP->getDebugLoc();
   IRBuilder<> IRB(IP);
   IRB.SetCurrentDebugLocation(EntryLoc);
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index c3ba722..aa8ee5a 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -129,54 +129,48 @@ FunctionPass *llvm::createThreadSanitizerPass() {
   return new ThreadSanitizer();
 }
 
-static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
-  if (Function *F = dyn_cast<Function>(FuncOrBitcast))
-     return F;
-  FuncOrBitcast->dump();
-  report_fatal_error("ThreadSanitizer interface function redefined");
-}
-
 void ThreadSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(M.getContext());
   // Initialize the callbacks.
-  TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
-  TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_func_exit", IRB.getVoidTy(), nullptr));
+  TsanFuncExit = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));
   OrdTy = IRB.getInt32Ty();
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
     const size_t ByteSize = 1 << i;
     const size_t BitSize = ByteSize * 8;
     SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
-    TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
-    TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<64> UnalignedReadName("__tsan_unaligned_read" +
         itostr(ByteSize));
-    TsanUnalignedRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+    TsanUnalignedRead[i] =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<64> UnalignedWriteName("__tsan_unaligned_write" +
         itostr(ByteSize));
-    TsanUnalignedWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+    TsanUnalignedWrite[i] =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
                                    "_load");
-    TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
+    TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
+        M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
 
     SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
                                     "_store");
-    TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction(
-        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy,
-        nullptr));
+    TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
 
     for (int op = AtomicRMWInst::FIRST_BINOP;
         op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -199,34 +193,34 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
       else
         continue;
       SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
-      TsanAtomicRMW[op][i] = checkInterfaceFunction(M.getOrInsertFunction(
-          RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
+      TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
+          M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
     }
 
     SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
                                   "_compare_exchange_val");
-    TsanAtomicCAS[i] = checkInterfaceFunction(M.getOrInsertFunction(
+    TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
   }
-  TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
-      IRB.getInt8PtrTy(), nullptr));
-  TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanVptrUpdate = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
+                            IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr));
+  TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
-  TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr));
-  TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
+  TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr));
 
-  MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-    IRB.getInt8PtrTy(), IntptrTy, nullptr));
-  MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-    IntptrTy, nullptr));
-  MemsetFn = checkInterfaceFunction(M.getOrInsertFunction(
-    "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
-    IntptrTy, nullptr));
+  MemmoveFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, nullptr));
+  MemcpyFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, nullptr));
+  MemsetFn = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                            IRB.getInt32Ty(), IntptrTy, nullptr));
 }
 
 bool ThreadSanitizer::doInitialization(Module &M) {
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 87de33b..d4fef10 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -54,8 +54,6 @@ public:
                             RetainAutorelease(nullptr),
                             RetainAutoreleaseRV(nullptr) { }
 
-  ~ARCRuntimeEntryPoints() { }
-
   void init(Module *M) {
     TheModule = M;
     AutoreleaseRV = nullptr;
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index b197c97..4edd029 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -45,7 +45,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
   default: break;
   }
 
-  ImmutableCallSite CS = static_cast<const Value *>(Inst);
+  ImmutableCallSite CS(Inst);
   assert(CS && "Only calls can alter reference counts!");
 
   // See if AliasAnalysis can help us with the call.
@@ -99,7 +99,7 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
     // of any other dynamic reference-counted pointers.
     if (!IsPotentialRetainableObjPtr(ICI->getOperand(1), *PA.getAA()))
       return false;
-  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+  } else if (auto CS = ImmutableCallSite(Inst)) {
     // For calls, just check the arguments (and not the callee operand).
     for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
          OE = CS.arg_end(); OI != OE; ++OI) {
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 5aa2b97..8918909 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -50,9 +50,9 @@ struct AlignmentFromAssumptions : public FunctionPass {
     initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnFunction(Function &F);
+  bool runOnFunction(Function &F) override;
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<ScalarEvolution>();
     AU.addRequired<DominatorTreeWrapperPass>();
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index cf30f39..16f2ead 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -11,6 +11,7 @@ transforms_scalar_SRC_FILES := \
   DeadStoreElimination.cpp \
   EarlyCSE.cpp \
   FlattenCFGPass.cpp \
+  Float2Int.cpp \
   GVN.cpp \
   IndVarSimplify.cpp \
   InductiveRangeCheckElimination.cpp \
@@ -30,6 +31,7 @@ transforms_scalar_SRC_FILES := \
   LowerExpectIntrinsic.cpp \
   MemCpyOptimizer.cpp \
   MergedLoadStoreMotion.cpp \
+  NaryReassociate.cpp \
   PartiallyInlineLibCalls.cpp \
   PlaceSafepoints.cpp \
   Reassociate.cpp \
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index d12fdb7..c29fcc3 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMScalarOpts
   DeadStoreElimination.cpp
   EarlyCSE.cpp
   FlattenCFGPass.cpp
+  Float2Int.cpp
   GVN.cpp
   InductiveRangeCheckElimination.cpp
   IndVarSimplify.cpp
@@ -28,6 +29,7 @@ add_llvm_library(LLVMScalarOpts
   LowerExpectIntrinsic.cpp
   MemCpyOptimizer.cpp
   MergedLoadStoreMotion.cpp
+  NaryReassociate.cpp
   PartiallyInlineLibCalls.cpp
   PlaceSafepoints.cpp
   Reassociate.cpp
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index cb8981b..01952cf 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -168,7 +168,7 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
       return true;
     }
   }
-  if (CallSite CS = I) {
+  if (auto CS = CallSite(I)) {
     if (Function *F = CS.getCalledFunction()) {
       if (TLI && TLI->has(LibFunc::strcpy) &&
           F->getName() == TLI->getName(LibFunc::strcpy)) {
@@ -262,7 +262,7 @@ static bool isRemovable(Instruction *I) {
     }
   }
 
-  if (CallSite CS = I)
+  if (auto CS = CallSite(I))
     return CS.getInstruction()->use_empty();
 
   return false;
@@ -306,7 +306,7 @@ static Value *getStoredPointerOperand(Instruction *I) {
     }
   }
 
-  CallSite CS = I;
+  CallSite CS(I);
   // All the supported functions so far happen to have dest as their first
   // argument.
   return CS.getArgument(0);
@@ -780,7 +780,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       continue;
     }
 
-    if (CallSite CS = cast<Value>(BBI)) {
+    if (auto CS = CallSite(BBI)) {
       // Remove allocation function calls from the list of dead stack objects; 
       // there can't be any references before the definition.
       if (isAllocLikeFn(BBI, TLI))
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
new file mode 100644
index 0000000..c931422
--- /dev/null
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -0,0 +1,540 @@
+//===- Float2Int.cpp - Demote floating point ops to work on integers ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Float2Int pass, which aims to demote floating
+// point operations to work on integers, where that is losslessly possible.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "float2int"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <deque>
+#include <functional> // For std::function
+using namespace llvm;
+
+// The algorithm is simple. Start at instructions that convert from the
+// float to the int domain: fptoui, fptosi and fcmp. Walk up the def-use
+// graph, using an equivalence datastructure to unify graphs that interfere.
+//
+// Mappable instructions are those with an integer corrollary that, given
+// integer domain inputs, produce an integer output; fadd, for example.
+//
+// If a non-mappable instruction is seen, this entire def-use graph is marked
+// as non-transformable. If we see an instruction that converts from the 
+// integer domain to FP domain (uitofp,sitofp), we terminate our walk.
+
+/// The largest integer type worth dealing with.
+static cl::opt<unsigned>
+MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
+             cl::desc("Max integer bitwidth to consider in float2int"
+                      "(default=64)"));
+
+namespace {
+  struct Float2Int : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    Float2Int() : FunctionPass(ID) {
+      initializeFloat2IntPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F) override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.setPreservesCFG();
+    }
+
+    void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
+    ConstantRange seen(Instruction *I, ConstantRange R);
+    ConstantRange badRange();
+    ConstantRange unknownRange();
+    ConstantRange validateRange(ConstantRange R);
+    void walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots);
+    void walkForwards();
+    bool validateAndTransform();
+    Value *convert(Instruction *I, Type *ToTy);
+    void cleanup();
+
+    MapVector<Instruction*, ConstantRange > SeenInsts;
+    SmallPtrSet<Instruction*,8> Roots;
+    EquivalenceClasses<Instruction*> ECs;
+    MapVector<Instruction*, Value*> ConvertedInsts;
+    LLVMContext *Ctx;
+  };
+}
+
+char Float2Int::ID = 0;
+INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+
+// Given a FCmp predicate, return a matching ICmp predicate if one
+// exists, otherwise return BAD_ICMP_PREDICATE.
+static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
+  switch (P) {
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ:
+    return CmpInst::ICMP_EQ;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT:
+    return CmpInst::ICMP_SGT;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE:
+    return CmpInst::ICMP_SGE;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT:
+    return CmpInst::ICMP_SLT;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE:
+    return CmpInst::ICMP_SLE;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE:
+    return CmpInst::ICMP_NE;
+  default:
+    return CmpInst::BAD_ICMP_PREDICATE;
+  }
+}
+
+// Given a floating point binary operator, return the matching
+// integer version.
+static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case Instruction::FAdd: return Instruction::Add;
+  case Instruction::FSub: return Instruction::Sub;
+  case Instruction::FMul: return Instruction::Mul;
+  }
+}
+
+// Find the roots - instructions that convert from the FP domain to
+// integer domain.
+void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
+  for (auto &I : inst_range(F)) {
+    switch (I.getOpcode()) {
+    default: break;
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      Roots.insert(&I);
+      break;
+    case Instruction::FCmp:
+      if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) != 
+          CmpInst::BAD_ICMP_PREDICATE)
+        Roots.insert(&I);
+      break;
+    }
+  }
+}
+
+// Helper - mark I as having been traversed, having range R.
+ConstantRange Float2Int::seen(Instruction *I, ConstantRange R) {
+  DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n");
+  if (SeenInsts.find(I) != SeenInsts.end())
+    SeenInsts.find(I)->second = R;
+  else
+    SeenInsts.insert(std::make_pair(I, R));
+  return R;
+}
+
+// Helper - get a range representing a poison value.
+ConstantRange Float2Int::badRange() {
+  return ConstantRange(MaxIntegerBW + 1, true);
+}
+ConstantRange Float2Int::unknownRange() {
+  return ConstantRange(MaxIntegerBW + 1, false);
+}
+ConstantRange Float2Int::validateRange(ConstantRange R) {
+  if (R.getBitWidth() > MaxIntegerBW + 1)
+    return badRange();
+  return R;
+}
+
+// The most obvious way to structure the search is a depth-first, eager
+// search from each root. However, that require direct recursion and so
+// can only handle small instruction sequences. Instead, we split the search
+// up into two phases:
+//   - walkBackwards:  A breadth-first walk of the use-def graph starting from
+//                     the roots. Populate "SeenInsts" with interesting
+//                     instructions and poison values if they're obvious and
+//                     cheap to compute. Calculate the equivalance set structure
+//                     while we're here too.
+//   - walkForwards:  Iterate over SeenInsts in reverse order, so we visit
+//                     defs before their uses. Calculate the real range info.
+
+// Breadth-first walk of the use-def graph; determine the set of nodes 
+// we care about and eagerly determine if some of them are poisonous.
+void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
+  std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.back();
+    Worklist.pop_back();
+
+    if (SeenInsts.find(I) != SeenInsts.end())
+      // Seen already.
+      continue;
+
+    switch (I->getOpcode()) {
+      // FIXME: Handle select and phi nodes.
+    default:
+      // Path terminated uncleanly.
+      seen(I, badRange());
+      break;
+
+    case Instruction::UIToFP: {
+      // Path terminated cleanly.
+      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1);
+      APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1);
+      seen(I, validateRange(ConstantRange(Min, Max)));
+      continue;
+    }
+
+    case Instruction::SIToFP: {
+      // Path terminated cleanly.
+      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1);
+      APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1);
+      seen(I, validateRange(ConstantRange(SMin, SMax)));
+      continue;
+    }
+
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FCmp:
+      seen(I, unknownRange());
+      break;
+    }
+  
+    for (Value *O : I->operands()) {
+      if (Instruction *OI = dyn_cast<Instruction>(O)) {
+        // Unify def-use chains if they interfere.
+        ECs.unionSets(I, OI);
+	if (SeenInsts.find(I)->second != badRange())
+          Worklist.push_back(OI);
+      } else if (!isa<ConstantFP>(O)) {      
+        // Not an instruction or ConstantFP? we can't do anything.
+        seen(I, badRange());
+      }
+    }
+  }
+}
+
+// Walk forwards down the list of seen instructions, so we visit defs before
+// uses.
+void Float2Int::walkForwards() {
+  for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
+    if (It->second != unknownRange())
+      continue;
+
+    Instruction *I = It->first;
+    std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
+    switch (I->getOpcode()) {
+      // FIXME: Handle select and phi nodes.
+    default:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+      llvm_unreachable("Should have been handled in walkForwards!");
+
+    case Instruction::FAdd:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FAdd is a binary operator!");
+        return Ops[0].add(Ops[1]);
+      };
+      break;
+
+    case Instruction::FSub:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FSub is a binary operator!");
+        return Ops[0].sub(Ops[1]);
+      };
+      break;
+
+    case Instruction::FMul:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FMul is a binary operator!");
+        return Ops[0].multiply(Ops[1]);
+      };
+      break;
+
+    //
+    // Root-only instructions - we'll only see these if they're the
+    //                          first node in a walk.
+    //
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
+        return Ops[0];
+      };
+      break;
+
+    case Instruction::FCmp:
+      Op = [](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "FCmp is a binary operator!");
+        return Ops[0].unionWith(Ops[1]);
+      };
+      break;
+    }
+
+    bool Abort = false;
+    SmallVector<ConstantRange,4> OpRanges;
+    for (Value *O : I->operands()) {
+      if (Instruction *OI = dyn_cast<Instruction>(O)) {
+        assert(SeenInsts.find(OI) != SeenInsts.end() &&
+	       "def not seen before use!");
+        OpRanges.push_back(SeenInsts.find(OI)->second);
+      } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
+        // Work out if the floating point number can be losslessly represented
+        // as an integer.
+        // APFloat::convertToInteger(&Exact) purports to do what we want, but
+        // the exactness can be too precise. For example, negative zero can
+        // never be exactly converted to an integer.
+        //
+        // Instead, we ask APFloat to round itself to an integral value - this
+        // preserves sign-of-zero - then compare the result with the original.
+        //
+        APFloat F = CF->getValueAPF();
+
+        // First, weed out obviously incorrect values. Non-finite numbers
+        // can't be represented and neither can negative zero, unless 
+        // we're in fast math mode.
+        if (!F.isFinite() ||
+            (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
+	     !I->hasNoSignedZeros())) {
+          seen(I, badRange());
+          Abort = true;
+          break;
+        }
+
+        APFloat NewF = F;
+        auto Res = NewF.roundToIntegral(APFloat::rmNearestTiesToEven);
+        if (Res != APFloat::opOK || NewF.compare(F) != APFloat::cmpEqual) {
+          seen(I, badRange());
+          Abort = true;
+          break;
+        }
+        // OK, it's representable. Now get it.
+        APSInt Int(MaxIntegerBW+1, false);
+        bool Exact;
+        CF->getValueAPF().convertToInteger(Int,
+                                           APFloat::rmNearestTiesToEven,
+                                           &Exact);
+        OpRanges.push_back(ConstantRange(Int));
+      } else {
+        llvm_unreachable("Should have already marked this as badRange!");
+      }
+    }
+
+    // Reduce the operands' ranges to a single range and return.
+    if (!Abort)
+      seen(I, Op(OpRanges));    
+  }
+}
+
+// If there is a valid transform to be done, do it.
+bool Float2Int::validateAndTransform() {
+  bool MadeChange = false;
+
+  // Iterate over every disjoint partition of the def-use graph.
+  for (auto It = ECs.begin(), E = ECs.end(); It != E; ++It) {
+    ConstantRange R(MaxIntegerBW + 1, false);
+    bool Fail = false;
+    Type *ConvertedToTy = nullptr;
+
+    // For every member of the partition, union all the ranges together.
+    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+         MI != ME; ++MI) {
+      Instruction *I = *MI;
+      auto SeenI = SeenInsts.find(I);
+      if (SeenI == SeenInsts.end())
+        continue;
+
+      R = R.unionWith(SeenI->second);
+      // We need to ensure I has no users that have not been seen.
+      // If it does, transformation would be illegal.
+      //
+      // Don't count the roots, as they terminate the graphs.
+      if (Roots.count(I) == 0) {
+        // Set the type of the conversion while we're here.
+        if (!ConvertedToTy)
+          ConvertedToTy = I->getType();
+        for (User *U : I->users()) {
+          Instruction *UI = dyn_cast<Instruction>(U);
+          if (!UI || SeenInsts.find(UI) == SeenInsts.end()) {
+            DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
+            Fail = true;
+            break;
+          }
+        }
+      }
+      if (Fail)
+        break;
+    }
+
+    // If the set was empty, or we failed, or the range is poisonous,
+    // bail out.
+    if (ECs.member_begin(It) == ECs.member_end() || Fail ||
+        R.isFullSet() || R.isSignWrappedSet())
+      continue;
+    assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
+    
+    // The number of bits required is the maximum of the upper and
+    // lower limits, plus one so it can be signed.
+    unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
+                              R.getUpper().getMinSignedBits()) + 1;
+    DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
+
+    // If we've run off the realms of the exactly representable integers,
+    // the floating point result will differ from an integer approximation.
+
+    // Do we need more bits than are in the mantissa of the type we converted
+    // to? semanticsPrecision returns the number of mantissa bits plus one
+    // for the sign bit.
+    unsigned MaxRepresentableBits
+      = APFloat::semanticsPrecision(ConvertedToTy->getFltSemantics()) - 1;
+    if (MinBW > MaxRepresentableBits) {
+      DEBUG(dbgs() << "F2I: Value not guaranteed to be representable!\n");
+      continue;
+    }
+    if (MinBW > 64) {
+      DEBUG(dbgs() << "F2I: Value requires more than 64 bits to represent!\n");
+      continue;
+    }
+
+    // OK, R is known to be representable. Now pick a type for it.
+    // FIXME: Pick the smallest legal type that will fit.
+    Type *Ty = (MinBW > 32) ? Type::getInt64Ty(*Ctx) : Type::getInt32Ty(*Ctx);
+
+    for (auto MI = ECs.member_begin(It), ME = ECs.member_end();
+         MI != ME; ++MI)
+      convert(*MI, Ty);
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+Value *Float2Int::convert(Instruction *I, Type *ToTy) {
+  if (ConvertedInsts.find(I) != ConvertedInsts.end())
+    // Already converted this instruction.
+    return ConvertedInsts[I];
+
+  SmallVector<Value*,4> NewOperands;
+  for (Value *V : I->operands()) {
+    // Don't recurse if we're an instruction that terminates the path.
+    if (I->getOpcode() == Instruction::UIToFP ||
+        I->getOpcode() == Instruction::SIToFP) {
+      NewOperands.push_back(V);
+    } else if (Instruction *VI = dyn_cast<Instruction>(V)) {
+      NewOperands.push_back(convert(VI, ToTy));
+    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false);
+      bool Exact;
+      CF->getValueAPF().convertToInteger(Val,
+                                         APFloat::rmNearestTiesToEven,
+                                         &Exact);
+      NewOperands.push_back(ConstantInt::get(ToTy, Val));
+    } else {
+      llvm_unreachable("Unhandled operand type?");
+    }
+  }
+
+  // Now create a new instruction.
+  IRBuilder<> IRB(I);
+  Value *NewV = nullptr;
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unhandled instruction!");
+
+  case Instruction::FPToUI:
+    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], I->getType());
+    break;
+
+  case Instruction::FPToSI:
+    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], I->getType());
+    break;
+
+  case Instruction::FCmp: {
+    CmpInst::Predicate P = mapFCmpPred(cast<CmpInst>(I)->getPredicate());
+    assert(P != CmpInst::BAD_ICMP_PREDICATE && "Unhandled predicate!");
+    NewV = IRB.CreateICmp(P, NewOperands[0], NewOperands[1], I->getName());
+    break;
+  }
+
+  case Instruction::UIToFP:
+    NewV = IRB.CreateZExtOrTrunc(NewOperands[0], ToTy);
+    break;
+
+  case Instruction::SIToFP:
+    NewV = IRB.CreateSExtOrTrunc(NewOperands[0], ToTy);
+    break;
+
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+    NewV = IRB.CreateBinOp(mapBinOpcode(I->getOpcode()),
+                           NewOperands[0], NewOperands[1],
+                           I->getName());
+    break;
+  }
+
+  // If we're a root instruction, RAUW.
+  if (Roots.count(I))
+    I->replaceAllUsesWith(NewV);
+
+  ConvertedInsts[I] = NewV;
+  return NewV;
+}
+
+// Perform dead code elimination on the instructions we just modified.
+void Float2Int::cleanup() {
+  for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
+       I != E; ++I)
+    I->first->eraseFromParent();
+}
+
+bool Float2Int::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  DEBUG(dbgs() << "F2I: Looking at function " << F.getName() << "\n");
+  // Clear out all state.
+  ECs = EquivalenceClasses<Instruction*>();
+  SeenInsts.clear();
+  ConvertedInsts.clear();
+  Roots.clear();
+
+  Ctx = &F.getParent()->getContext();
+
+  findRoots(F, Roots);
+
+  walkBackwards(Roots);
+  walkForwards();
+
+  bool Modified = validateAndTransform();
+  if (Modified)
+    cleanup();
+  return Modified;
+}
+
+FunctionPass *llvm::createFloat2IntPass() {
+  return new Float2Int();
+}
+
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index c73e60f..97d5b6d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1102,7 +1102,8 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
                                  Type::getInt8PtrTy(Src->getContext(), AS));
   Constant *OffsetCst =
     ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
-  Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+                                       OffsetCst);
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   if (ConstantFoldLoadFromConstPtr(Src, DL))
     return Offset;
@@ -1263,7 +1264,8 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
                                  Type::getInt8PtrTy(Src->getContext(), AS));
   Constant *OffsetCst =
     ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
-  Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
+  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+                                       OffsetCst);
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   return ConstantFoldLoadFromConstPtr(Src, DL);
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 51e8041..ab8e5b8 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1274,55 +1274,6 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
 //  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
 //===----------------------------------------------------------------------===//
 
-/// Check for expressions that ScalarEvolution generates to compute
-/// BackedgeTakenInfo. If these expressions have not been reduced, then
-/// expanding them may incur additional cost (albeit in the loop preheader).
-static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
-                                SmallPtrSetImpl<const SCEV*> &Processed,
-                                ScalarEvolution *SE) {
-  if (!Processed.insert(S).second)
-    return false;
-
-  // If the backedge-taken count is a UDiv, it's very likely a UDiv that
-  // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
-  // precise expression, rather than a UDiv from the user's code. If we can't
-  // find a UDiv in the code with some simple searching, assume the former and
-  // forego rewriting the loop.
-  if (isa<SCEVUDivExpr>(S)) {
-    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
-    if (!OrigCond) return true;
-    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
-    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
-    if (R != S) {
-      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
-      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
-      if (L != S)
-        return true;
-    }
-  }
-
-  // Recurse past add expressions, which commonly occur in the
-  // BackedgeTakenCount. They may already exist in program code, and if not,
-  // they are not too expensive rematerialize.
-  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I) {
-      if (isHighCostExpansion(*I, BI, Processed, SE))
-        return true;
-    }
-    return false;
-  }
-
-  // HowManyLessThans uses a Max expression whenever the loop is not guarded by
-  // the exit condition.
-  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
-    return true;
-
-  // If we haven't recognized an expensive SCEV pattern, assume it's an
-  // expression produced by program code.
-  return false;
-}
-
 /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
 /// count expression can be safely and cheaply expanded into an instruction
 /// sequence that can be used by LinearFunctionTestReplace.
@@ -1336,7 +1287,8 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
 /// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
 /// However, we don't yet have a strong motivation for converting loop tests
 /// into inequality tests.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
+                                        SCEVExpander &Rewriter) {
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
       BackedgeTakenCount->isZero())
@@ -1346,12 +1298,10 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
     return false;
 
   // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
+  if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
     return false;
 
-  SmallPtrSet<const SCEV*, 8> Processed;
-  if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
+  if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
     return false;
 
   return true;
@@ -1637,7 +1587,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
            && "unit stride pointer IV must be i8*");
 
     IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
-    return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+    return Builder.CreateGEP(nullptr, GEPBase, GEPOffset, "lftr.limit");
   }
   else {
     // In any other case, convert both IVInit and IVCount to integers before
@@ -1691,7 +1641,7 @@ LinearFunctionTestReplace(Loop *L,
                           const SCEV *BackedgeTakenCount,
                           PHINode *IndVar,
                           SCEVExpander &Rewriter) {
-  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+  assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
 
   // Initialize CmpIndVar and IVCount to their preincremented values.
   Value *CmpIndVar = IndVar;
@@ -1936,7 +1886,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // If we have a trip count expression, rewrite the loop's exit condition
   // using it.  We can currently only handle loops with a single exit.
-  if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
+  if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) {
     PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
     if (IndVar) {
       // Check preconditions for proper SCEVExpander operation. SCEV does not
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 1f33f72..c19cd19 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -41,9 +41,9 @@ struct PointerOffsetPair {
 };
 
 struct LoadPOPPair {
+  LoadPOPPair() = default;
   LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
       : Load(L), POP(P), InsertOrder(O) {}
-  LoadPOPPair() {}
   LoadInst *Load;
   PointerOffsetPair POP;
   /// \brief The new load needs to be created before the first load in IR order.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 8445d5f..099f227 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -28,7 +28,7 @@
 //
 // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
 // it's useful to think about these as the same register, with some uses using
-// the value of the register before the add and some using // it after. In this
+// the value of the register before the add and some using it after. In this
 // example, the icmp is a post-increment user, since it uses %i.next, which is
 // the value of the induction variable after the increment. The other common
 // case of post-increment users is users outside the loop.
@@ -112,8 +112,6 @@ public:
   /// a particular register.
   SmallBitVector UsedByIndices;
 
-  RegSortData() {}
-
   void print(raw_ostream &OS) const;
   void dump() const;
 };
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 600cbde..3de3b3d 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -165,6 +165,7 @@ namespace {
       UP.MaxCount = UINT_MAX;
       UP.Partial = CurrentAllowPartial;
       UP.Runtime = CurrentRuntime;
+      UP.AllowExpensiveTripCount = false;
       TTI.getUnrollingPreferences(L, UP);
     }
 
@@ -886,8 +887,8 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this,
-                  &LPM, &AC))
+  if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
+                  TripMultiple, LI, this, &LPM, &AC))
     return false;
 
   return true;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2b5a078..d651473 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1045,7 +1045,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
         RepeatInstruction = processMemCpy(M);
       else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
         RepeatInstruction = processMemMove(M);
-      else if (CallSite CS = (Value*)I) {
+      else if (auto CS = CallSite(I)) {
         for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
           if (CS.isByValArgument(i))
             MadeChange |= processByValArgument(CS, i);
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
new file mode 100644
index 0000000..fea7641
--- /dev/null
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -0,0 +1,252 @@
+//===- NaryReassociate.cpp - Reassociate n-ary expressions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates n-ary add expressions and eliminates the redundancy
+// exposed by the reassociation.
+//
+// A motivating example:
+//
+//   void foo(int a, int b) {
+//     bar(a + b);
+//     bar((a + 2) + b);
+//   }
+//
+// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
+// the above code to
+//
+//   int t = a + b;
+//   bar(t);
+//   bar(t + 2);
+//
+// However, the Reassociate pass is unable to do that because it processes each
+// instruction individually and believes (a + 2) + b is the best form according
+// to its rank system.
+//
+// To address this limitation, NaryReassociate reassociates an expression in a
+// form that reuses existing instructions. As a result, NaryReassociate can
+// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
+// (a + b) is computed before.
+//
+// NaryReassociate works as follows. For every instruction in the form of (a +
+// b) + c, it checks whether a + c or b + c is already computed by a dominating
+// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
+// c) + a and removes the redundancy accordingly. To efficiently look up whether
+// an expression is computed before, we store each instruction seen and its SCEV
+// into an SCEV-to-instruction map.
+//
+// Although the algorithm pattern-matches only ternary additions, it
+// automatically handles many >3-ary expressions by walking through the function
+// in the depth-first order. For example, given
+//
+//   (a + c) + d
+//   ((a + b) + c) + d
+//
+// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
+// ((a + c) + b) + d into ((a + c) + d) + b.
+//
+// Finally, the above dominator-based algorithm may need to be run multiple
+// iterations before emitting optimal code. One source of this need is that we
+// only split an operand when it is used only once. The above algorithm can
+// eliminate an instruction and decrease the usage count of its operands. As a
+// result, an instruction that previously had multiple uses may become a
+// single-use instruction and thus eligible for split consideration. For
+// example,
+//
+//   ac = a + c
+//   ab = a + b
+//   abc = ab + c
+//   ab2 = ab + b
+//   ab2c = ab2 + c
+//
+// In the first iteration, we cannot reassociate abc to ac+b because ab is used
+// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
+// result, ab2 becomes dead and ab will be used only once in the second
+// iteration.
+//
+// Limitations and TODO items:
+//
+// 1) We only considers n-ary adds for now. This should be extended and
+// generalized.
+//
+// 2) Besides arithmetic operations, similar reassociation can be applied to
+// GEPs. For example, if
+//   X = &arr[a]
+// dominates
+//   Y = &arr[a + b]
+// we may rewrite Y into X + b.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "nary-reassociate"
+
+namespace {
+class NaryReassociate : public FunctionPass {
+public:
+  static char ID;
+
+  NaryReassociate(): FunctionPass(ID) {
+    initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<ScalarEvolution>();
+    AU.addPreserved<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.setPreservesCFG();
+  }
+
+private:
+  // Runs only one iteration of the dominator-based algorithm. See the header
+  // comments for why we need multiple iterations.
+  bool doOneIteration(Function &F);
+  // Reasssociates I to a better form.
+  Instruction *tryReassociateAdd(Instruction *I);
+  // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
+  Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
+  // Rewrites I to LHS + RHS if LHS is computed already.
+  Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+
+  DominatorTree *DT;
+  ScalarEvolution *SE;
+  TargetLibraryInfo *TLI;
+  // A lookup table quickly telling which instructions compute the given SCEV.
+  // Note that there can be multiple instructions at different locations
+  // computing to the same SCEV, so we map a SCEV to an instruction list.  For
+  // example,
+  //
+  //   if (p1)
+  //     foo(a + b);
+  //   if (p2)
+  //     bar(a + b);
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+};
+} // anonymous namespace
+
+char NaryReassociate::ID = 0;
+INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
+                    false, false)
+
+FunctionPass *llvm::createNaryReassociatePass() {
+  return new NaryReassociate();
+}
+
+bool NaryReassociate::runOnFunction(Function &F) {
+  if (skipOptnoneFunction(F))
+    return false;
+
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  SE = &getAnalysis<ScalarEvolution>();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+
+  bool Changed = false, ChangedInThisIteration;
+  do {
+    ChangedInThisIteration = doOneIteration(F);
+    Changed |= ChangedInThisIteration;
+  } while (ChangedInThisIteration);
+  return Changed;
+}
+
+bool NaryReassociate::doOneIteration(Function &F) {
+  bool Changed = false;
+  SeenExprs.clear();
+  // Traverse the dominator tree in the depth-first order. This order makes sure
+  // all bases of a candidate are in Candidates when we process it.
+  for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+       Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+    BasicBlock *BB = Node->getBlock();
+    for (auto I = BB->begin(); I != BB->end(); ++I) {
+      if (I->getOpcode() == Instruction::Add) {
+        if (Instruction *NewI = tryReassociateAdd(I)) {
+          Changed = true;
+          SE->forgetValue(I);
+          I->replaceAllUsesWith(NewI);
+          RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+          I = NewI;
+        }
+        // We should add the rewritten instruction because tryReassociateAdd may
+        // have invalidated the original one.
+        SeenExprs[SE->getSCEV(I)].push_back(I);
+      }
+    }
+  }
+  return Changed;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(Instruction *I) {
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+    return NewI;
+  if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+    return NewI;
+  return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
+                                                Instruction *I) {
+  Value *A = nullptr, *B = nullptr;
+  // To be conservative, we reassociate I only when it is the only user of A+B.
+  if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
+    // I = (A + B) + RHS
+    //   = (A + RHS) + B or (B + RHS) + A
+    const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
+    const SCEV *RHSExpr = SE->getSCEV(RHS);
+    if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+      return NewI;
+    if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+      return NewI;
+  }
+  return nullptr;
+}
+
+Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
+                                                 Value *RHS, Instruction *I) {
+  auto Pos = SeenExprs.find(LHSExpr);
+  // Bail out if LHSExpr is not previously seen.
+  if (Pos == SeenExprs.end())
+    return nullptr;
+
+  auto &LHSCandidates = Pos->second;
+  // Look for the closest dominator LHS of I that computes LHSExpr, and replace
+  // I with LHS + RHS.
+  //
+  // Because we traverse the dominator tree in the pre-order, a
+  // candidate that doesn't dominate the current instruction won't dominate any
+  // future instruction either. Therefore, we pop it out of the stack. This
+  // optimization makes the algorithm O(n).
+  while (!LHSCandidates.empty()) {
+    Instruction *LHS = LHSCandidates.back();
+    if (DT->dominates(LHS, I)) {
+      Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+      NewI->takeName(I);
+      return NewI;
+    }
+    LHSCandidates.pop_back();
+  }
+  return nullptr;
+}
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 944725a..536f2a6 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -217,7 +217,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
   BasicBlock *Current = Pred;
   while (true) {
     for (Instruction &I : *Current) {
-      if (CallSite CS = &I)
+      if (auto CS = CallSite(&I))
         // Note: Technically, needing a safepoint isn't quite the right
         // condition here.  We should instead be checking if the target method
         // has an
@@ -424,8 +424,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
     // We need to stop going forward as soon as we see a call that can
     // grow the stack (i.e. the call target has a non-zero frame
     // size).
-    if (CallSite CS = cursor) {
-      (void)CS; // Silence an unused variable warning by gcc 4.8.2
+    if (CallSite(cursor)) {
       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(cursor)) {
         // llvm.assume(...) are not really calls.
         if (II->getIntrinsicID() == Intrinsic::assume) {
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index f5d21ff..ba48e0a 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
@@ -49,11 +50,20 @@ static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
 // Print the liveset found at the insert location
 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
                                   cl::init(false));
-static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size",
-                                      cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
+                                      cl::init(false));
 // Print out the base pointers for debugging
-static cl::opt<bool> PrintBasePointers("spp-print-base-pointers",
-                                       cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
+                                       cl::init(false));
+
+#ifdef XDEBUG
+static bool ClobberNonLive = true;
+#else
+static bool ClobberNonLive = false;
+#endif
+static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
+                                                  cl::location(ClobberNonLive),
+                                                  cl::Hidden);
 
 namespace {
 struct RewriteStatepointsForGC : public FunctionPass {
@@ -85,6 +95,22 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
                     "Make relocations explicit at statepoints", false, false)
 
 namespace {
+struct GCPtrLivenessData {
+  /// Values defined in this block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
+  /// Values used in this block (and thus live); does not included values
+  /// killed within this block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
+
+  /// Values live into this basic block (i.e. used by any
+  /// instruction in this basic block or ones reachable from here)
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
+
+  /// Values live out of this basic block (i.e. live into
+  /// any successor block)
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
+};
+
 // The type of the internal cache used inside the findBasePointers family
 // of functions.  From the callers perspective, this is an opaque type and
 // should not be inspected.
@@ -105,10 +131,6 @@ struct PartiallyConstructedSafepointRecord {
   /// Mapping from live pointers to a base-defining-value
   DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
 
-  /// Any new values which were added to the IR during base pointer analysis
-  /// for this safepoint
-  DenseSet<llvm::Value *> NewInsertedDefs;
-
   /// The *new* gc.statepoint instruction itself.  This produces the token
   /// that normal path gc.relocates and the gc.result are tied to.
   Instruction *StatepointToken;
@@ -119,6 +141,15 @@ struct PartiallyConstructedSafepointRecord {
 };
 }
 
+/// Compute the live-in set for every basic block in the function
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+                                GCPtrLivenessData &Data);
+
+/// Given results from the dataflow liveness computation, find the set of live
+/// Values at a particular instruction.
+static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
+                              StatepointLiveSetTy &out);
+
 // TODO: Once we can get to the GCStrategy, this becomes
 // Optional<bool> isGCManagedPointer(const Value *V) const override {
 
@@ -131,129 +162,46 @@ static bool isGCPointerType(const Type *T) {
   return false;
 }
 
-/// Return true if the Value is a gc reference type which is potentially used
-/// after the instruction 'loc'.  This is only used with the edge reachability
-/// liveness code.  Note: It is assumed the V dominates loc.
-static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT,
-                                LoopInfo *LI) {
-  if (!isGCPointerType(V.getType()))
-    return false;
-
-  if (V.use_empty())
-    return false;
-
-  // Given assumption that V dominates loc, this may be live
-  return true;
+// Return true if this type is one which a) is a gc pointer or contains a GC
+// pointer and b) is of a type this code expects to encounter as a live value.
+// (The insertion code will assert that a type which matches (a) and not (b)
+// is not encountered.)
+static bool isHandledGCPointerType(Type *T) {
+  // We fully support gc pointers
+  if (isGCPointerType(T))
+    return true;
+  // We partially support vectors of gc pointers. The code will assert if it
+  // can't handle something.
+  if (auto VT = dyn_cast<VectorType>(T))
+    if (isGCPointerType(VT->getElementType()))
+      return true;
+  return false;
 }
 
 #ifndef NDEBUG
-static bool isAggWhichContainsGCPtrType(Type *Ty) {
+/// Returns true if this type contains a gc pointer whether we know how to
+/// handle that type or not.
+static bool containsGCPtrType(Type *Ty) {
+  if (isGCPointerType(Ty))
+    return true;
   if (VectorType *VT = dyn_cast<VectorType>(Ty))
     return isGCPointerType(VT->getScalarType());
   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
-    return isGCPointerType(AT->getElementType()) ||
-           isAggWhichContainsGCPtrType(AT->getElementType());
+    return containsGCPtrType(AT->getElementType());
   if (StructType *ST = dyn_cast<StructType>(Ty))
-    return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
-                       [](Type *SubType) {
-                         return isGCPointerType(SubType) ||
-                                isAggWhichContainsGCPtrType(SubType);
-                       });
+    return std::any_of(
+        ST->subtypes().begin(), ST->subtypes().end(),
+        [](Type *SubType) { return containsGCPtrType(SubType); });
   return false;
 }
-#endif
-
-// Conservatively identifies any definitions which might be live at the
-// given instruction. The  analysis is performed immediately before the
-// given instruction. Values defined by that instruction are not considered
-// live.  Values used by that instruction are considered live.
-//
-// preconditions: valid IR graph, term is either a terminator instruction or
-// a call instruction, pred is the basic block of term, DT, LI are valid
-//
-// side effects: none, does not mutate IR
-//
-//  postconditions: populates liveValues as discussed above
-static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred,
-                                   DominatorTree &DT, LoopInfo *LI,
-                                   StatepointLiveSetTy &liveValues) {
-  liveValues.clear();
-
-  assert(isa<CallInst>(term) || isa<InvokeInst>(term) || term->isTerminator());
 
-  Function *F = pred->getParent();
-
-  auto is_live_gc_reference =
-      [&](Value &V) { return isLiveGCReferenceAt(V, term, DT, LI); };
-
-  // Are there any gc pointer arguments live over this point?  This needs to be
-  // special cased since arguments aren't defined in basic blocks.
-  for (Argument &arg : F->args()) {
-    assert(!isAggWhichContainsGCPtrType(arg.getType()) &&
-           "support for FCA unimplemented");
-
-    if (is_live_gc_reference(arg)) {
-      liveValues.insert(&arg);
-    }
-  }
-
-  // Walk through all dominating blocks - the ones which can contain
-  // definitions used in this block - and check to see if any of the values
-  // they define are used in locations potentially reachable from the
-  // interesting instruction.
-  BasicBlock *BBI = pred;
-  while (true) {
-    if (TraceLSP) {
-      errs() << "[LSP] Looking at dominating block " << pred->getName() << "\n";
-    }
-    assert(DT.dominates(BBI, pred));
-    assert(isPotentiallyReachable(BBI, pred, &DT) &&
-           "dominated block must be reachable");
-
-    // Walk through the instructions in dominating blocks and keep any
-    // that have a use potentially reachable from the block we're
-    // considering putting the safepoint in
-    for (Instruction &inst : *BBI) {
-      if (TraceLSP) {
-        errs() << "[LSP] Looking at instruction ";
-        inst.dump();
-      }
-
-      if (pred == BBI && (&inst) == term) {
-        if (TraceLSP) {
-          errs() << "[LSP] stopped because we encountered the safepoint "
-                    "instruction.\n";
-        }
-
-        // If we're in the block which defines the interesting instruction,
-        // we don't want to include any values as live which are defined
-        // _after_ the interesting line or as part of the line itself
-        // i.e. "term" is the call instruction for a call safepoint, the
-        // results of the call should not be considered live in that stackmap
-        break;
-      }
-
-      assert(!isAggWhichContainsGCPtrType(inst.getType()) &&
-             "support for FCA unimplemented");
-
-      if (is_live_gc_reference(inst)) {
-        if (TraceLSP) {
-          errs() << "[LSP] found live value for this safepoint ";
-          inst.dump();
-          term->dump();
-        }
-        liveValues.insert(&inst);
-      }
-    }
-    if (!DT.getNode(BBI)->getIDom()) {
-      assert(BBI == &F->getEntryBlock() &&
-             "failed to find a dominator for something other than "
-             "the entry block");
-      break;
-    }
-    BBI = DT.getNode(BBI)->getIDom()->getBlock();
-  }
+// Returns true if this is a type which a) is a gc pointer or contains a GC
+// pointer and b) is of a type which the code doesn't expect (i.e. first class
+// aggregates).  Used to trip assertions.
+static bool isUnhandledGCPointerType(Type *Ty) {
+  return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
 }
+#endif
 
 static bool order_by_name(llvm::Value *a, llvm::Value *b) {
   if (a->hasName() && b->hasName()) {
@@ -268,16 +216,17 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {
   }
 }
 
-/// Find the initial live set. Note that due to base pointer
-/// insertion, the live set may be incomplete.
-static void
-analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
-                          PartiallyConstructedSafepointRecord &result) {
+// Conservatively identifies any definitions which might be live at the
+// given instruction. The  analysis is performed immediately before the
+// given instruction. Values defined by that instruction are not considered
+// live.  Values used by that instruction are considered live.
+static void analyzeParsePointLiveness(
+    DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
+    const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
   Instruction *inst = CS.getInstruction();
 
-  BasicBlock *BB = inst->getParent();
   StatepointLiveSetTy liveset;
-  findLiveGCValuesAtInst(inst, BB, DT, nullptr, liveset);
+  findLiveSetAtInst(inst, OriginalLivenessData, liveset);
 
   if (PrintLiveSet) {
     // Note: This output is used by several of the test cases
@@ -299,10 +248,49 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
   result.liveset = liveset;
 }
 
-/// True iff this value is the null pointer constant (of any pointer type)
-static bool LLVM_ATTRIBUTE_UNUSED isNullConstant(Value *V) {
-  return isa<Constant>(V) && isa<PointerType>(V->getType()) &&
-         cast<Constant>(V)->isNullValue();
+/// If we can trivially determine that this vector contains only base pointers,
+/// return the base instruction.
+static Value *findBaseOfVector(Value *I) {
+  assert(I->getType()->isVectorTy() &&
+         cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
+         "Illegal to ask for the base pointer of a non-pointer type");
+
+  // Each case parallels findBaseDefiningValue below, see that code for
+  // detailed motivation.
+
+  if (isa<Argument>(I))
+    // An incoming argument to the function is a base pointer
+    return I;
+
+  // We shouldn't see the address of a global as a vector value?
+  assert(!isa<GlobalVariable>(I) &&
+         "unexpected global variable found in base of vector");
+
+  // inlining could possibly introduce phi node that contains
+  // undef if callee has multiple returns
+  if (isa<UndefValue>(I))
+    // utterly meaningless, but useful for dealing with partially optimized
+    // code.
+    return I;
+
+  // Due to inheritance, this must be _after_ the global variable and undef
+  // checks
+  if (Constant *Con = dyn_cast<Constant>(I)) {
+    assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
+           "order of checks wrong!");
+    assert(Con->isNullValue() && "null is the only case which makes sense");
+    return Con;
+  }
+
+  if (isa<LoadInst>(I))
+    return I;
+
+  // Note: This code is currently rather incomplete.  We are essentially only
+  // handling cases where the vector element is trivially a base pointer.  We
+  // need to update the entire base pointer construction algorithm to know how
+  // to track vector elements and potentially scalarize, but the case which
+  // would motivate the work hasn't shown up in real workloads yet.
+  llvm_unreachable("no base found for vector element");
 }
 
 /// Helper function for findBasePointer - Will return a value which either a)
@@ -312,52 +300,36 @@ static Value *findBaseDefiningValue(Value *I) {
   assert(I->getType()->isPointerTy() &&
          "Illegal to ask for the base pointer of a non-pointer type");
 
-  // There are instructions which can never return gc pointer values.  Sanity
-  // check
-  // that this is actually true.
-  assert(!isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
-         !isa<ShuffleVectorInst>(I) && "Vector types are not gc pointers");
-  assert((!isa<Instruction>(I) || isa<InvokeInst>(I) ||
-          !cast<Instruction>(I)->isTerminator()) &&
-         "With the exception of invoke terminators don't define values");
-  assert(!isa<StoreInst>(I) && !isa<FenceInst>(I) &&
-         "Can't be definitions to start with");
-  assert(!isa<ICmpInst>(I) && !isa<FCmpInst>(I) &&
-         "Comparisons don't give ops");
-  // There's a bunch of instructions which just don't make sense to apply to
-  // a pointer.  The only valid reason for this would be pointer bit
-  // twiddling which we're just not going to support.
-  assert((!isa<Instruction>(I) || !cast<Instruction>(I)->isBinaryOp()) &&
-         "Binary ops on pointer values are meaningless.  Unless your "
-         "bit-twiddling which we don't support");
-
-  if (Argument *Arg = dyn_cast<Argument>(I)) {
+  // This case is a bit of a hack - it only handles extracts from vectors which
+  // trivially contain only base pointers.  See note inside the function for
+  // how to improve this.
+  if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
+    Value *VectorOperand = EEI->getVectorOperand();
+    Value *VectorBase = findBaseOfVector(VectorOperand);
+    (void)VectorBase;
+    assert(VectorBase && "extract element not known to be a trivial base");
+    return EEI;
+  }
+
+  if (isa<Argument>(I))
     // An incoming argument to the function is a base pointer
     // We should have never reached here if this argument isn't an gc value
-    assert(Arg->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return Arg;
-  }
+    return I;
 
-  if (GlobalVariable *global = dyn_cast<GlobalVariable>(I)) {
+  if (isa<GlobalVariable>(I))
     // base case
-    assert(global->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return global;
-  }
+    return I;
 
   // inlining could possibly introduce phi node that contains
   // undef if callee has multiple returns
-  if (UndefValue *undef = dyn_cast<UndefValue>(I)) {
-    assert(undef->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return undef; // utterly meaningless, but useful for dealing with
-                  // partially optimized code.
-  }
+  if (isa<UndefValue>(I))
+    // utterly meaningless, but useful for dealing with
+    // partially optimized code.
+    return I;
 
   // Due to inheritance, this must be _after_ the global variable and undef
   // checks
-  if (Constant *con = dyn_cast<Constant>(I)) {
+  if (Constant *Con = dyn_cast<Constant>(I)) {
     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
            "order of checks wrong!");
     // Note: Finding a constant base for something marked for relocation
@@ -368,49 +340,30 @@ static Value *findBaseDefiningValue(Value *I) {
     // off a potentially null value and have proven it null.  We also use
     // null pointers in dead paths of relocation phis (which we might later
     // want to find a base pointer for).
-    assert(con->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    assert(con->isNullValue() && "null is the only case which makes sense");
-    return con;
+    assert(isa<ConstantPointerNull>(Con) &&
+           "null is the only case which makes sense");
+    return Con;
   }
 
   if (CastInst *CI = dyn_cast<CastInst>(I)) {
-    Value *def = CI->stripPointerCasts();
-    assert(def->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
+    Value *Def = CI->stripPointerCasts();
     // If we find a cast instruction here, it means we've found a cast which is
     // not simply a pointer cast (i.e. an inttoptr).  We don't know how to
     // handle int->ptr conversion.
-    assert(!isa<CastInst>(def) && "shouldn't find another cast here");
-    return findBaseDefiningValue(def);
+    assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
+    return findBaseDefiningValue(Def);
   }
 
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    if (LI->getType()->isPointerTy()) {
-      Value *Op = LI->getOperand(0);
-      (void)Op;
-      // Has to be a pointer to an gc object, or possibly an array of such?
-      assert(Op->getType()->isPointerTy());
-      return LI; // The value loaded is an gc base itself
-    }
-  }
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
-    Value *Op = GEP->getOperand(0);
-    if (Op->getType()->isPointerTy()) {
-      return findBaseDefiningValue(Op); // The base of this GEP is the base
-    }
-  }
+  if (isa<LoadInst>(I))
+    return I; // The value loaded is an gc base itself
 
-  if (AllocaInst *alloc = dyn_cast<AllocaInst>(I)) {
-    // An alloca represents a conceptual stack slot.  It's the slot itself
-    // that the GC needs to know about, not the value in the slot.
-    assert(alloc->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return alloc;
-  }
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    // The base of this GEP is the base
+    return findBaseDefiningValue(GEP->getPointerOperand());
 
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
     switch (II->getIntrinsicID()) {
+    case Intrinsic::experimental_gc_result_ptr:
     default:
       // fall through to general call handling
       break;
@@ -418,11 +371,6 @@ static Value *findBaseDefiningValue(Value *I) {
     case Intrinsic::experimental_gc_result_float:
     case Intrinsic::experimental_gc_result_int:
       llvm_unreachable("these don't produce pointers");
-    case Intrinsic::experimental_gc_result_ptr:
-      // This is just a special case of the CallInst check below to handle a
-      // statepoint with deopt args which hasn't been rewritten for GC yet.
-      // TODO: Assert that the statepoint isn't rewritten yet.
-      return II;
     case Intrinsic::experimental_gc_relocate: {
       // Rerunning safepoint insertion after safepoints are already
       // inserted is not supported.  It could probably be made to work,
@@ -440,41 +388,27 @@ static Value *findBaseDefiningValue(Value *I) {
   // We assume that functions in the source language only return base
   // pointers.  This should probably be generalized via attributes to support
   // both source language and internal functions.
-  if (CallInst *call = dyn_cast<CallInst>(I)) {
-    assert(call->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return call;
-  }
-  if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) {
-    assert(invoke->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return invoke;
-  }
+  if (isa<CallInst>(I) || isa<InvokeInst>(I))
+    return I;
 
   // I have absolutely no idea how to implement this part yet.  It's not
   // neccessarily hard, I just haven't really looked at it yet.
   assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
 
-  if (AtomicCmpXchgInst *cas = dyn_cast<AtomicCmpXchgInst>(I)) {
+  if (isa<AtomicCmpXchgInst>(I))
     // A CAS is effectively a atomic store and load combined under a
     // predicate.  From the perspective of base pointers, we just treat it
-    // like a load.  We loaded a pointer from a address in memory, that value
-    // had better be a valid base pointer.
-    return cas->getPointerOperand();
-  }
-  if (AtomicRMWInst *atomic = dyn_cast<AtomicRMWInst>(I)) {
-    assert(AtomicRMWInst::Xchg == atomic->getOperation() &&
-           "All others are binary ops which don't apply to base pointers");
-    // semantically, a load, store pair.  Treat it the same as a standard load
-    return atomic->getPointerOperand();
-  }
+    // like a load.
+    return I;
+
+  assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
+                                   "binary ops which don't apply to pointers");
 
   // The aggregate ops.  Aggregates can either be in the heap or on the
   // stack, but in either case, this is simply a field load.  As a result,
   // this is a defining definition of the base just like a load is.
-  if (ExtractValueInst *ev = dyn_cast<ExtractValueInst>(I)) {
-    return ev;
-  }
+  if (isa<ExtractValueInst>(I))
+    return I;
 
   // We should never see an insert vector since that would require we be
   // tracing back a struct value not a pointer value.
@@ -485,23 +419,21 @@ static Value *findBaseDefiningValue(Value *I) {
   // return a value which dynamically selects from amoung several base
   // derived pointers (each with it's own base potentially).  It's the job of
   // the caller to resolve these.
-  if (SelectInst *select = dyn_cast<SelectInst>(I)) {
-    return select;
-  }
-
-  return cast<PHINode>(I);
+  assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+         "missing instruction case in findBaseDefiningValing");
+  return I;
 }
 
 /// Returns the base defining value for this value.
-static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {
-  Value *&Cached = cache[I];
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
+  Value *&Cached = Cache[I];
   if (!Cached) {
     Cached = findBaseDefiningValue(I);
   }
-  assert(cache[I] != nullptr);
+  assert(Cache[I] != nullptr);
 
   if (TraceLSP) {
-    errs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
+    dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
            << "\n";
   }
   return Cached;
@@ -509,25 +441,26 @@ static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {
 
 /// Return a base pointer for this value if known.  Otherwise, return it's
 /// base defining value.
-static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &cache) {
-  Value *def = findBaseDefiningValueCached(I, cache);
-  auto Found = cache.find(def);
-  if (Found != cache.end()) {
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
+  Value *Def = findBaseDefiningValueCached(I, Cache);
+  auto Found = Cache.find(Def);
+  if (Found != Cache.end()) {
     // Either a base-of relation, or a self reference.  Caller must check.
     return Found->second;
   }
   // Only a BDV available
-  return def;
+  return Def;
 }
 
 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
 /// is it known to be a base pointer?  Or do we need to continue searching.
-static bool isKnownBaseResult(Value *v) {
-  if (!isa<PHINode>(v) && !isa<SelectInst>(v)) {
+static bool isKnownBaseResult(Value *V) {
+  if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
     // no recursion possible
     return true;
   }
-  if (cast<Instruction>(v)->getMetadata("is_base_value")) {
+  if (isa<Instruction>(V) &&
+      cast<Instruction>(V)->getMetadata("is_base_value")) {
     // This is a previously inserted base phi or select.  We know
     // that this is a base value.
     return true;
@@ -647,8 +580,7 @@ private:
 /// from.  For gc objects, this is simply itself.  On success, returns a value
 /// which is the base pointer.  (This is reliable and can be used for
 /// relocation.)  On failure, returns nullptr.
-static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
-                              DenseSet<llvm::Value *> &NewInsertedDefs) {
+static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
   Value *def = findBaseOrBDV(I, cache);
 
   if (isKnownBaseResult(def)) {
@@ -687,7 +619,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
     done = true;
     // Since we're adding elements to 'states' as we run, we can't keep
     // iterators into the set.
-    SmallVector<Value*, 16> Keys;
+    SmallVector<Value *, 16> Keys;
     Keys.reserve(states.size());
     for (auto Pair : states) {
       Value *V = Pair.first;
@@ -777,7 +709,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
   // We want to keep naming deterministic in the loop that follows, so
   // sort the keys before iteration.  This is useful in allowing us to
   // write stable tests. Note that there is no invalidation issue here.
-  SmallVector<Value*, 16> Keys;
+  SmallVector<Value *, 16> Keys;
   Keys.reserve(states.size());
   for (auto Pair : states) {
     Value *V = Pair.first;
@@ -792,13 +724,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
     assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
     if (!state.isConflict())
       continue;
-    
+
     if (isa<PHINode>(v)) {
       int num_preds =
           std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
       assert(num_preds > 0 && "how did we reach here");
       PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
-      NewInsertedDefs.insert(phi);
       // Add metadata marking this as a base value
       auto *const_1 = ConstantInt::get(
           Type::getInt32Ty(
@@ -815,7 +746,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
       UndefValue *undef = UndefValue::get(sel->getType());
       SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
                                                undef, "base_select", sel);
-      NewInsertedDefs.insert(basesel);
       // Add metadata marking this as a base value
       auto *const_1 = ConstantInt::get(
           Type::getInt32Ty(
@@ -838,7 +768,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
     assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
     if (!state.isConflict())
       continue;
-    
+
     if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
       PHINode *phi = cast<PHINode>(v);
       unsigned NumPHIValues = phi->getNumIncomingValues();
@@ -866,8 +796,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
             assert(states.count(base));
             base = states[base].getBase();
             assert(base != nullptr && "unknown PhiState!");
-            assert(NewInsertedDefs.count(base) &&
-                   "should have already added this in a prev. iteration!");
           }
 
           // In essense this assert states: the only way two
@@ -898,7 +826,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
         if (base->getType() != basephi->getType()) {
           base = new BitCastInst(base, basephi->getType(), "cast",
                                  InBB->getTerminator());
-          NewInsertedDefs.insert(base);
         }
         basephi->addIncoming(base, InBB);
       }
@@ -924,7 +851,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
         // The cast is needed since base traversal may strip away bitcasts
         if (base->getType() != basesel->getType()) {
           base = new BitCastInst(base, basesel->getType(), "cast", basesel);
-          NewInsertedDefs.insert(base);
         }
         basesel->setOperand(i, base);
       }
@@ -979,18 +905,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
 // post condition: PointerToBase contains one (derived, base) pair for every
 // pointer in live.  Note that derived can be equal to base if the original
 // pointer was a base pointer.
-static void findBasePointers(const StatepointLiveSetTy &live,
-                             DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
-                             DominatorTree *DT, DefiningValueMapTy &DVCache,
-                             DenseSet<llvm::Value *> &NewInsertedDefs) {
+static void
+findBasePointers(const StatepointLiveSetTy &live,
+                 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+                 DominatorTree *DT, DefiningValueMapTy &DVCache) {
   // For the naming of values inserted to be deterministic - which makes for
   // much cleaner and more stable tests - we need to assign an order to the
   // live values.  DenseSets do not provide a deterministic order across runs.
-  SmallVector<Value*, 64> Temp;
+  SmallVector<Value *, 64> Temp;
   Temp.insert(Temp.end(), live.begin(), live.end());
   std::sort(Temp.begin(), Temp.end(), order_by_name);
   for (Value *ptr : Temp) {
-    Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs);
+    Value *base = findBasePointer(ptr, DVCache);
     assert(base && "failed to find base pointer");
     PointerToBase[ptr] = base;
     assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
@@ -1001,10 +927,10 @@ static void findBasePointers(const StatepointLiveSetTy &live,
     // If you see this trip and like to live really dangerously, the code should
     // be correct, just with idioms the verifier can't handle.  You can try
     // disabling the verifier at your own substaintial risk.
-    assert(!isNullConstant(base) && "the relocation code needs adjustment to "
-                                    "handle the relocation of a null pointer "
-                                    "constant without causing false positives "
-                                    "in the safepoint ir verifier.");
+    assert(!isa<ConstantPointerNull>(base) &&
+           "the relocation code needs adjustment to handle the relocation of "
+           "a null pointer constant without causing false positives in the "
+           "safepoint ir verifier.");
   }
 }
 
@@ -1014,14 +940,13 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
                              const CallSite &CS,
                              PartiallyConstructedSafepointRecord &result) {
   DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
-  DenseSet<llvm::Value *> NewInsertedDefs;
-  findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs);
+  findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
 
   if (PrintBasePointers) {
     // Note: Need to print these in a stable order since this is checked in
     // some tests.
     errs() << "Base Pairs (w/o Relocation):\n";
-    SmallVector<Value*, 64> Temp;
+    SmallVector<Value *, 64> Temp;
     Temp.reserve(PointerToBase.size());
     for (auto Pair : PointerToBase) {
       Temp.push_back(Pair.first);
@@ -1029,90 +954,59 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
     std::sort(Temp.begin(), Temp.end(), order_by_name);
     for (Value *Ptr : Temp) {
       Value *Base = PointerToBase[Ptr];
-      errs() << " derived %" << Ptr->getName() << " base %"
-             << Base->getName() << "\n";
+      errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
+             << "\n";
     }
   }
 
   result.PointerToBase = PointerToBase;
-  result.NewInsertedDefs = NewInsertedDefs;
 }
 
-/// Check for liveness of items in the insert defs and add them to the live
-/// and base pointer sets
-static void fixupLiveness(DominatorTree &DT, const CallSite &CS,
-                          const DenseSet<Value *> &allInsertedDefs,
-                          PartiallyConstructedSafepointRecord &result) {
-  Instruction *inst = CS.getInstruction();
-
-  auto liveset = result.liveset;
-  auto PointerToBase = result.PointerToBase;
+/// Given an updated version of the dataflow liveness results, update the
+/// liveset and base pointer maps for the call site CS.
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+                                  const CallSite &CS,
+                                  PartiallyConstructedSafepointRecord &result);
 
-  auto is_live_gc_reference =
-      [&](Value &V) { return isLiveGCReferenceAt(V, inst, DT, nullptr); };
-
-  // For each new definition, check to see if a) the definition dominates the
-  // instruction we're interested in, and b) one of the uses of that definition
-  // is edge-reachable from the instruction we're interested in.  This is the
-  // same definition of liveness we used in the intial liveness analysis
-  for (Value *newDef : allInsertedDefs) {
-    if (liveset.count(newDef)) {
-      // already live, no action needed
-      continue;
-    }
-
-    // PERF: Use DT to check instruction domination might not be good for
-    // compilation time, and we could change to optimal solution if this
-    // turn to be a issue
-    if (!DT.dominates(cast<Instruction>(newDef), inst)) {
-      // can't possibly be live at inst
-      continue;
-    }
-
-    if (is_live_gc_reference(*newDef)) {
-      // Add the live new defs into liveset and PointerToBase
-      liveset.insert(newDef);
-      PointerToBase[newDef] = newDef;
-    }
-  }
-
-  result.liveset = liveset;
-  result.PointerToBase = PointerToBase;
-}
-
-static void fixupLiveReferences(
-    Function &F, DominatorTree &DT, Pass *P,
-    const DenseSet<llvm::Value *> &allInsertedDefs,
-    ArrayRef<CallSite> toUpdate,
+static void recomputeLiveInValues(
+    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+  // TODO-PERF: reuse the original liveness, then simply run the dataflow
+  // again.  The old values are still live and will help it stablize quickly.
+  GCPtrLivenessData RevisedLivenessData;
+  computeLiveInValues(DT, F, RevisedLivenessData);
   for (size_t i = 0; i < records.size(); i++) {
     struct PartiallyConstructedSafepointRecord &info = records[i];
     const CallSite &CS = toUpdate[i];
-    fixupLiveness(DT, CS, allInsertedDefs, info);
+    recomputeLiveInValues(RevisedLivenessData, CS, info);
   }
 }
 
-// Normalize basic block to make it ready to be target of invoke statepoint.
-// It means spliting it to have single predecessor. Return newly created BB
-// ready to be successor of invoke statepoint.
-static BasicBlock *normalizeBBForInvokeSafepoint(BasicBlock *BB,
-                                                 BasicBlock *InvokeParent,
-                                                 Pass *P) {
-  BasicBlock *ret = BB;
-
+// When inserting gc.relocate calls, we need to ensure there are no uses
+// of the original value between the gc.statepoint and the gc.relocate call.
+// One case which can arise is a phi node starting one of the successor blocks.
+// We also need to be able to insert the gc.relocates only on the path which
+// goes through the statepoint.  We might need to split an edge to make this
+// possible.
+static BasicBlock *
+normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) {
+  DominatorTree *DT = nullptr;
+  if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+    DT = &DTP->getDomTree();
+
+  BasicBlock *Ret = BB;
   if (!BB->getUniquePredecessor()) {
-    ret = SplitBlockPredecessors(BB, InvokeParent, "");
+    Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT);
   }
 
-  // Another requirement for such basic blocks is to not have any phi nodes.
-  // Since we just ensured that new BB will have single predecessor,
-  // all phi nodes in it will have one value. Here it would be naturall place
-  // to
-  // remove them all. But we can not do this because we are risking to remove
-  // one of the values stored in liveset of another statepoint. We will do it
-  // later after placing all safepoints.
+  // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+  // from it
+  FoldSingleEntryPHINodes(Ret);
+  assert(!isa<PHINode>(Ret->begin()));
 
-  return ret;
+  // At this point, we can safely insert a gc.relocate as the first instruction
+  // in Ret if needed.
+  return Ret;
 }
 
 static int find_index(ArrayRef<Value *> livevec, Value *val) {
@@ -1180,7 +1074,7 @@ static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables,
     // combination.  This results is some blow up the function declarations in
     // the IR, but removes the need for argument bitcasts which shrinks the IR
     // greatly and makes it much more readable.
-    SmallVector<Type *, 1> types;                    // one per 'any' type
+    SmallVector<Type *, 1> types;                 // one per 'any' type
     types.push_back(liveVariables[i]->getType()); // result type
     Value *gc_relocate_decl = Intrinsic::getDeclaration(
         M, Intrinsic::experimental_gc_relocate, types);
@@ -1298,8 +1192,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
     token = invoke;
 
     // Generate gc relocates in exceptional path
-    BasicBlock *unwindBlock = normalizeBBForInvokeSafepoint(
-        toReplace->getUnwindDest(), invoke->getParent(), P);
+    BasicBlock *unwindBlock = toReplace->getUnwindDest();
+    assert(!isa<PHINode>(unwindBlock->begin()) &&
+           unwindBlock->getUniquePredecessor() &&
+           "can't safely insert in this block!");
 
     Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
     Builder.SetInsertPoint(IP);
@@ -1319,8 +1215,10 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
                             exceptional_token, Builder);
 
     // Generate gc relocates and returns for normal block
-    BasicBlock *normalDest = normalizeBBForInvokeSafepoint(
-        toReplace->getNormalDest(), invoke->getParent(), P);
+    BasicBlock *normalDest = toReplace->getNormalDest();
+    assert(!isa<PHINode>(normalDest->begin()) &&
+           normalDest->getUniquePredecessor() &&
+           "can't safely insert in this block!");
 
     IP = &*(normalDest->getFirstInsertionPt());
     Builder.SetInsertPoint(IP);
@@ -1333,7 +1231,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
   // Take the name of the original value call if it had one.
   token->takeName(CS.getInstruction());
 
-  // The GCResult is already inserted, we just need to find it
+// The GCResult is already inserted, we just need to find it
 #ifndef NDEBUG
   Instruction *toReplace = CS.getInstruction();
   assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
@@ -1351,7 +1249,6 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
 
   // Second, create a gc.relocate for every live variable
   CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
-
 }
 
 namespace {
@@ -1383,7 +1280,7 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
 
 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
 // which make the relocations happening at this safepoint explicit.
-// 
+//
 // WARNING: Does not do any fixup to adjust users of the original live
 // values.  That's the callers responsibility.
 static void
@@ -1458,14 +1355,13 @@ static void relocationViaAlloca(
     Function &F, DominatorTree &DT, ArrayRef<Value *> live,
     ArrayRef<struct PartiallyConstructedSafepointRecord> records) {
 #ifndef NDEBUG
-  int initialAllocaNum = 0;
-
-  // record initial number of allocas
-  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
-       itr++) {
-    if (isa<AllocaInst>(*itr))
-      initialAllocaNum++;
-  }
+  // record initial number of (static) allocas; we'll check we have the same
+  // number when we get done.
+  int InitialAllocaNum = 0;
+  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+       I++)
+    if (isa<AllocaInst>(*I))
+      InitialAllocaNum++;
 #endif
 
   // TODO-PERF: change data structures, reserve
@@ -1505,47 +1401,49 @@ static void relocationViaAlloca(
     // In case if it was invoke statepoint
     // we will insert stores for exceptional path gc relocates.
     if (isa<InvokeInst>(Statepoint)) {
-      insertRelocationStores(info.UnwindToken->users(),
-                             allocaMap, visitedLiveValues);
+      insertRelocationStores(info.UnwindToken->users(), allocaMap,
+                             visitedLiveValues);
     }
 
-#ifndef NDEBUG
-    // As a debuging aid, pretend that an unrelocated pointer becomes null at
-    // the gc.statepoint.  This will turn some subtle GC problems into slightly
-    // easier to debug SEGVs
-    SmallVector<AllocaInst *, 64> ToClobber;
-    for (auto Pair : allocaMap) {
-      Value *Def = Pair.first;
-      AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
-
-      // This value was relocated
-      if (visitedLiveValues.count(Def)) {
-        continue;
+    if (ClobberNonLive) {
+      // As a debuging aid, pretend that an unrelocated pointer becomes null at
+      // the gc.statepoint.  This will turn some subtle GC problems into
+      // slightly easier to debug SEGVs.  Note that on large IR files with
+      // lots of gc.statepoints this is extremely costly both memory and time
+      // wise.
+      SmallVector<AllocaInst *, 64> ToClobber;
+      for (auto Pair : allocaMap) {
+        Value *Def = Pair.first;
+        AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+
+        // This value was relocated
+        if (visitedLiveValues.count(Def)) {
+          continue;
+        }
+        ToClobber.push_back(Alloca);
       }
-      ToClobber.push_back(Alloca);
-    }
 
-    auto InsertClobbersAt = [&](Instruction *IP) {
-      for (auto *AI : ToClobber) {
-        auto AIType = cast<PointerType>(AI->getType());
-        auto PT = cast<PointerType>(AIType->getElementType());
-        Constant *CPN = ConstantPointerNull::get(PT);
-        StoreInst *store = new StoreInst(CPN, AI);
-        store->insertBefore(IP);
-      }
-    };
+      auto InsertClobbersAt = [&](Instruction *IP) {
+        for (auto *AI : ToClobber) {
+          auto AIType = cast<PointerType>(AI->getType());
+          auto PT = cast<PointerType>(AIType->getElementType());
+          Constant *CPN = ConstantPointerNull::get(PT);
+          StoreInst *store = new StoreInst(CPN, AI);
+          store->insertBefore(IP);
+        }
+      };
 
-    // Insert the clobbering stores.  These may get intermixed with the
-    // gc.results and gc.relocates, but that's fine.  
-    if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
-      InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
-      InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
-    } else {
-      BasicBlock::iterator Next(cast<CallInst>(Statepoint));
-      Next++;
-      InsertClobbersAt(Next);
+      // Insert the clobbering stores.  These may get intermixed with the
+      // gc.results and gc.relocates, but that's fine.
+      if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
+        InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
+        InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+      } else {
+        BasicBlock::iterator Next(cast<CallInst>(Statepoint));
+        Next++;
+        InsertClobbersAt(Next);
+      }
     }
-#endif
   }
   // update use with load allocas and add store for gc_relocated
   for (auto Pair : allocaMap) {
@@ -1603,11 +1501,11 @@ static void relocationViaAlloca(
         assert(!inst->isTerminator() &&
                "The only TerminatorInst that can produce a value is "
                "InvokeInst which is handled above.");
-         store->insertAfter(inst);
+        store->insertAfter(inst);
       }
     } else {
       assert((isa<Argument>(def) || isa<GlobalVariable>(def) ||
-              (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) &&
+              isa<ConstantPointerNull>(def)) &&
              "Must be argument or global");
       store->insertAfter(cast<Instruction>(alloca));
     }
@@ -1621,12 +1519,11 @@ static void relocationViaAlloca(
   }
 
 #ifndef NDEBUG
-  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
-       itr++) {
-    if (isa<AllocaInst>(*itr))
-      initialAllocaNum--;
-  }
-  assert(initialAllocaNum == 0 && "We must not introduce any extra allocas");
+  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+       I++)
+    if (isa<AllocaInst>(*I))
+      InitialAllocaNum--;
+  assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
 #endif
 }
 
@@ -1647,76 +1544,155 @@ template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
   }
 }
 
-static Function *getUseHolder(Module &M) {
-  FunctionType *ftype =
-      FunctionType::get(Type::getVoidTy(M.getContext()), true);
-  Function *Func = cast<Function>(M.getOrInsertFunction("__tmp_use", ftype));
-  return Func;
-}
-
 /// Insert holders so that each Value is obviously live through the entire
-/// liftetime of the call.
+/// lifetime of the call.
 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
-                                 SmallVectorImpl<CallInst *> &holders) {
+                                 SmallVectorImpl<CallInst *> &Holders) {
+  if (Values.empty())
+    // No values to hold live, might as well not insert the empty holder
+    return;
+
   Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
-  Function *Func = getUseHolder(*M);
+  // Use a dummy vararg function to actually hold the values live
+  Function *Func = cast<Function>(M->getOrInsertFunction(
+      "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
   if (CS.isCall()) {
     // For call safepoints insert dummy calls right after safepoint
-    BasicBlock::iterator next(CS.getInstruction());
-    next++;
-    CallInst *base_holder = CallInst::Create(Func, Values, "", next);
-    holders.push_back(base_holder);
-  } else if (CS.isInvoke()) {
-    // For invoke safepooints insert dummy calls both in normal and
-    // exceptional destination blocks
-    InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
-    CallInst *normal_holder = CallInst::Create(
-        Func, Values, "", invoke->getNormalDest()->getFirstInsertionPt());
-    CallInst *unwind_holder = CallInst::Create(
-        Func, Values, "", invoke->getUnwindDest()->getFirstInsertionPt());
-    holders.push_back(normal_holder);
-    holders.push_back(unwind_holder);
-  } else
-    llvm_unreachable("unsupported call type");
+    BasicBlock::iterator Next(CS.getInstruction());
+    Next++;
+    Holders.push_back(CallInst::Create(Func, Values, "", Next));
+    return;
+  }
+  // For invoke safepooints insert dummy calls both in normal and
+  // exceptional destination blocks
+  auto *II = cast<InvokeInst>(CS.getInstruction());
+  Holders.push_back(CallInst::Create(
+      Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+  Holders.push_back(CallInst::Create(
+      Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
 }
 
 static void findLiveReferences(
     Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+  GCPtrLivenessData OriginalLivenessData;
+  computeLiveInValues(DT, F, OriginalLivenessData);
   for (size_t i = 0; i < records.size(); i++) {
     struct PartiallyConstructedSafepointRecord &info = records[i];
     const CallSite &CS = toUpdate[i];
-    analyzeParsePointLiveness(DT, CS, info);
+    analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
   }
 }
 
-static void addBasesAsLiveValues(StatepointLiveSetTy &liveset,
-                                 DenseMap<Value *, Value *> &PointerToBase) {
-  // Identify any base pointers which are used in this safepoint, but not
-  // themselves relocated.  We need to relocate them so that later inserted
-  // safepoints can get the properly relocated base register.
-  DenseSet<Value *> missing;
-  for (Value *L : liveset) {
-    assert(PointerToBase.find(L) != PointerToBase.end());
-    Value *base = PointerToBase[L];
-    assert(base);
-    if (liveset.find(base) == liveset.end()) {
-      assert(PointerToBase.find(base) == PointerToBase.end());
-      // uniqued by set insert
-      missing.insert(base);
+/// Remove any vector of pointers from the liveset by scalarizing them over the
+/// statepoint instruction.  Adds the scalarized pieces to the liveset.  It
+/// would be preferrable to include the vector in the statepoint itself, but
+/// the lowering code currently does not handle that.  Extending it would be
+/// slightly non-trivial since it requires a format change.  Given how rare
+/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+static void splitVectorValues(Instruction *StatepointInst,
+                              StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+  SmallVector<Value *, 16> ToSplit;
+  for (Value *V : LiveSet)
+    if (isa<VectorType>(V->getType()))
+      ToSplit.push_back(V);
+
+  if (ToSplit.empty())
+    return;
+
+  Function &F = *(StatepointInst->getParent()->getParent());
+
+  DenseMap<Value *, AllocaInst *> AllocaMap;
+  // First is normal return, second is exceptional return (invoke only)
+  DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
+  for (Value *V : ToSplit) {
+    LiveSet.erase(V);
+
+    AllocaInst *Alloca =
+        new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
+    AllocaMap[V] = Alloca;
+
+    VectorType *VT = cast<VectorType>(V->getType());
+    IRBuilder<> Builder(StatepointInst);
+    SmallVector<Value *, 16> Elements;
+    for (unsigned i = 0; i < VT->getNumElements(); i++)
+      Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
+    LiveSet.insert(Elements.begin(), Elements.end());
+
+    auto InsertVectorReform = [&](Instruction *IP) {
+      Builder.SetInsertPoint(IP);
+      Builder.SetCurrentDebugLocation(IP->getDebugLoc());
+      Value *ResultVec = UndefValue::get(VT);
+      for (unsigned i = 0; i < VT->getNumElements(); i++)
+        ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
+                                                Builder.getInt32(i));
+      return ResultVec;
+    };
+
+    if (isa<CallInst>(StatepointInst)) {
+      BasicBlock::iterator Next(StatepointInst);
+      Next++;
+      Instruction *IP = &*(Next);
+      Replacements[V].first = InsertVectorReform(IP);
+      Replacements[V].second = nullptr;
+    } else {
+      InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
+      // We've already normalized - check that we don't have shared destination
+      // blocks
+      BasicBlock *NormalDest = Invoke->getNormalDest();
+      assert(!isa<PHINode>(NormalDest->begin()));
+      BasicBlock *UnwindDest = Invoke->getUnwindDest();
+      assert(!isa<PHINode>(UnwindDest->begin()));
+      // Insert insert element sequences in both successors
+      Instruction *IP = &*(NormalDest->getFirstInsertionPt());
+      Replacements[V].first = InsertVectorReform(IP);
+      IP = &*(UnwindDest->getFirstInsertionPt());
+      Replacements[V].second = InsertVectorReform(IP);
     }
   }
+  for (Value *V : ToSplit) {
+    AllocaInst *Alloca = AllocaMap[V];
+
+    // Capture all users before we start mutating use lists
+    SmallVector<Instruction *, 16> Users;
+    for (User *U : V->users())
+      Users.push_back(cast<Instruction>(U));
+
+    for (Instruction *I : Users) {
+      if (auto Phi = dyn_cast<PHINode>(I)) {
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
+          if (V == Phi->getIncomingValue(i)) {
+            LoadInst *Load = new LoadInst(
+                Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+            Phi->setIncomingValue(i, Load);
+          }
+      } else {
+        LoadInst *Load = new LoadInst(Alloca, "", I);
+        I->replaceUsesOfWith(V, Load);
+      }
+    }
 
-  // Note that we want these at the end of the list, otherwise
-  // register placement gets screwed up once we lower to STATEPOINT
-  // instructions.  This is an utter hack, but there doesn't seem to be a
-  // better one.
-  for (Value *base : missing) {
-    assert(base);
-    liveset.insert(base);
-    PointerToBase[base] = base;
-  }
-  assert(liveset.size() == PointerToBase.size());
+    // Store the original value and the replacement value into the alloca
+    StoreInst *Store = new StoreInst(V, Alloca);
+    if (auto I = dyn_cast<Instruction>(V))
+      Store->insertAfter(I);
+    else
+      Store->insertAfter(Alloca);
+
+    // Normal return for invoke, or call return
+    Instruction *Replacement = cast<Instruction>(Replacements[V].first);
+    (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+    // Unwind return for invoke only
+    Replacement = cast_or_null<Instruction>(Replacements[V].second);
+    if (Replacement)
+      (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+  }
+
+  // apply mem2reg to promote alloca to SSA
+  SmallVector<AllocaInst *, 16> Allocas;
+  for (Value *V : ToSplit)
+    Allocas.push_back(AllocaMap[V]);
+  PromoteMemToReg(Allocas, DT);
 }
 
 static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
@@ -1734,6 +1710,20 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   }
 #endif
 
+  // When inserting gc.relocates for invokes, we need to be able to insert at
+  // the top of the successor blocks.  See the comment on
+  // normalForInvokeSafepoint on exactly what is needed.  Note that this step
+  // may restructure the CFG.
+  for (CallSite CS : toUpdate) {
+    if (!CS.isInvoke())
+      continue;
+    InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
+    normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
+                                P);
+    normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
+                                P);
+  }
+
   // A list of dummy calls added to the IR to keep various values obviously
   // live in the IR.  We'll remove all of these when done.
   SmallVector<CallInst *, 64> holders;
@@ -1749,7 +1739,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
     SmallVector<Value *, 64> DeoptValues;
     for (Use &U : StatepointCS.vm_state_args()) {
       Value *Arg = cast<Value>(&U);
-      if (isGCPointerType(Arg->getType()))
+      assert(!isUnhandledGCPointerType(Arg->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(Arg->getType()))
         DeoptValues.push_back(Arg);
     }
     insertUseHolderAfter(CS, DeoptValues, holders);
@@ -1767,6 +1759,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   // site.
   findLiveReferences(F, DT, P, toUpdate, records);
 
+  // Do a limited scalarization of any live at safepoint vector values which
+  // contain pointers.  This enables this pass to run after vectorization at
+  // the cost of some possible performance loss.  TODO: it would be nice to
+  // natively support vectors all the way through the backend so we don't need
+  // to scalarize here.
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    Instruction *statepoint = toUpdate[i].getInstruction();
+    splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
+  }
+
   // B) Find the base pointers for each live pointer
   /* scope for caching */ {
     // Cache the 'defining value' relation used in the computation and
@@ -1790,13 +1793,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   //   gep a + 1
   //   safepoint 2
   //   br loop
-  DenseSet<llvm::Value *> allInsertedDefs;
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    allInsertedDefs.insert(info.NewInsertedDefs.begin(),
-                           info.NewInsertedDefs.end());
-  }
-
   // We insert some dummy calls after each safepoint to definitely hold live
   // the base pointers which were identified for that safepoint.  We'll then
   // ask liveness for _every_ base inserted to see what is now live.  Then we
@@ -1813,22 +1809,11 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
     insertUseHolderAfter(CS, Bases, holders);
   }
 
-  // Add the bases explicitly to the live vector set.  This may result in a few
-  // extra relocations, but the base has to be available whenever a pointer
-  // derived from it is used.  Thus, we need it to be part of the statepoint's
-  // gc arguments list.  TODO: Introduce an explicit notion (in the following
-  // code) of the GC argument list as seperate from the live Values at a
-  // given statepoint.
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    addBasesAsLiveValues(info.liveset, info.PointerToBase);
-  }
+  // By selecting base pointers, we've effectively inserted new uses. Thus, we
+  // need to rerun liveness.  We may *also* have inserted new defs, but that's
+  // not the key issue.
+  recomputeLiveInValues(F, DT, P, toUpdate, records);
 
-  // If we inserted any new values, we need to adjust our notion of what is
-  // live at a particular safepoint.
-  if (!allInsertedDefs.empty()) {
-    fixupLiveReferences(F, DT, P, allInsertedDefs, toUpdate, records);
-  }
   if (PrintBasePointers) {
     for (size_t i = 0; i < records.size(); i++) {
       struct PartiallyConstructedSafepointRecord &info = records[i];
@@ -1858,25 +1843,6 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   }
   toUpdate.clear(); // prevent accident use of invalid CallSites
 
-  // In case if we inserted relocates in a different basic block than the
-  // original safepoint (this can happen for invokes). We need to be sure that
-  // original values were not used in any of the phi nodes at the
-  // beginning of basic block containing them. Because we know that all such
-  // blocks will have single predecessor we can safely assume that all phi
-  // nodes have single entry (because of normalizeBBForInvokeSafepoint).
-  // Just remove them all here.
-  for (size_t i = 0; i < records.size(); i++) {
-    Instruction *I = records[i].StatepointToken;
-
-    if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) {
-      FoldSingleEntryPHINodes(invoke->getNormalDest());
-      assert(!isa<PHINode>(invoke->getNormalDest()->begin()));
-
-      FoldSingleEntryPHINodes(invoke->getUnwindDest());
-      assert(!isa<PHINode>(invoke->getUnwindDest()->begin()));
-    }
-  }
-
   // Do all the fixups of the original live variables to their relocated selves
   SmallVector<Value *, 128> live;
   for (size_t i = 0; i < records.size(); i++) {
@@ -1889,6 +1855,24 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
     Statepoint statepoint(info.StatepointToken);
     live.insert(live.end(), statepoint.gc_args_begin(),
                 statepoint.gc_args_end());
+#ifndef NDEBUG
+    // Do some basic sanity checks on our liveness results before performing
+    // relocation.  Relocation can and will turn mistakes in liveness results
+    // into non-sensical code which is must harder to debug.
+    // TODO: It would be nice to test consistency as well
+    assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+           "statepoint must be reachable or liveness is meaningless");
+    for (Value *V : statepoint.gc_args()) {
+      if (!isa<Instruction>(V))
+        // Non-instruction values trivial dominate all possible uses
+        continue;
+      auto LiveInst = cast<Instruction>(V);
+      assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
+             "unreachable values should never be live");
+      assert(DT.dominates(LiveInst, info.StatepointToken) &&
+             "basic SSA liveness expectation violated by liveness analysis");
+    }
+#endif
   }
   unique_unsorted(live);
 
@@ -1924,18 +1908,285 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
   if (!shouldRewriteStatepointsIn(F))
     return false;
 
-  // Gather all the statepoints which need rewritten.
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  // Gather all the statepoints which need rewritten.  Be careful to only
+  // consider those in reachable code since we need to ask dominance queries
+  // when rewriting.  We'll delete the unreachable ones in a moment.
   SmallVector<CallSite, 64> ParsePointNeeded;
+  bool HasUnreachableStatepoint = false;
   for (Instruction &I : inst_range(F)) {
     // TODO: only the ones with the flag set!
-    if (isStatepoint(I))
-      ParsePointNeeded.push_back(CallSite(&I));
+    if (isStatepoint(I)) {
+      if (DT.isReachableFromEntry(I.getParent()))
+        ParsePointNeeded.push_back(CallSite(&I));
+      else
+        HasUnreachableStatepoint = true;
+    }
   }
 
+  bool MadeChange = false;
+
+  // Delete any unreachable statepoints so that we don't have unrewritten
+  // statepoints surviving this pass.  This makes testing easier and the
+  // resulting IR less confusing to human readers.  Rather than be fancy, we
+  // just reuse a utility function which removes the unreachable blocks.
+  if (HasUnreachableStatepoint)
+    MadeChange |= removeUnreachableBlocks(F);
+
   // Return early if no work to do.
   if (ParsePointNeeded.empty())
-    return false;
+    return MadeChange;
+
+  // As a prepass, go ahead and aggressively destroy single entry phi nodes.
+  // These are created by LCSSA.  They have the effect of increasing the size
+  // of liveness sets for no good reason.  It may be harder to do this post
+  // insertion since relocations and base phis can confuse things.
+  for (BasicBlock &BB : F)
+    if (BB.getUniquePredecessor()) {
+      MadeChange = true;
+      FoldSingleEntryPHINodes(&BB);
+    }
 
-  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  return insertParsePoints(F, DT, this, ParsePointNeeded);
+  MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+  return MadeChange;
+}
+
+// liveness computation via standard dataflow
+// -------------------------------------------------------------------
+
+// TODO: Consider using bitvectors for liveness, the set of potentially
+// interesting values should be small and easy to pre-compute.
+
+/// Is this value a constant consisting of entirely null values?
+static bool isConstantNull(Value *V) {
+  return isa<Constant>(V) && cast<Constant>(V)->isNullValue();
+}
+
+/// Compute the live-in set for the location rbegin starting from
+/// the live-out set of the basic block
+static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
+                                BasicBlock::reverse_iterator rend,
+                                DenseSet<Value *> &LiveTmp) {
+
+  for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
+    Instruction *I = &*ritr;
+
+    // KILL/Def - Remove this definition from LiveIn
+    LiveTmp.erase(I);
+
+    // Don't consider *uses* in PHI nodes, we handle their contribution to
+    // predecessor blocks when we seed the LiveOut sets
+    if (isa<PHINode>(I))
+      continue;
+
+    // USE - Add to the LiveIn set for this instruction
+    for (Value *V : I->operands()) {
+      assert(!isUnhandledGCPointerType(V->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) &&
+          !isa<UndefValue>(V)) {
+        // The choice to exclude null and undef is arbitrary here.  Reconsider?
+        LiveTmp.insert(V);
+      }
+    }
+  }
+}
+
+static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
+
+  for (BasicBlock *Succ : successors(BB)) {
+    const BasicBlock::iterator E(Succ->getFirstNonPHI());
+    for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
+      PHINode *Phi = cast<PHINode>(&*I);
+      Value *V = Phi->getIncomingValueForBlock(BB);
+      assert(!isUnhandledGCPointerType(V->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) &&
+          !isa<UndefValue>(V)) {
+        // The choice to exclude null and undef is arbitrary here.  Reconsider?
+        LiveTmp.insert(V);
+      }
+    }
+  }
+}
+
+static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
+  DenseSet<Value *> KillSet;
+  for (Instruction &I : *BB)
+    if (isHandledGCPointerType(I.getType()))
+      KillSet.insert(&I);
+  return KillSet;
+}
+
+#ifndef NDEBUG
+/// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
+/// sanity check for the liveness computation.
+static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
+                          TerminatorInst *TI, bool TermOkay = false) {
+  for (Value *V : Live) {
+    if (auto *I = dyn_cast<Instruction>(V)) {
+      // The terminator can be a member of the LiveOut set.  LLVM's definition
+      // of instruction dominance states that V does not dominate itself.  As
+      // such, we need to special case this to allow it.
+      if (TermOkay && TI == I)
+        continue;
+      assert(DT.dominates(I, TI) &&
+             "basic SSA liveness expectation violated by liveness analysis");
+    }
+  }
+}
+
+/// Check that all the liveness sets used during the computation of liveness
+/// obey basic SSA properties.  This is useful for finding cases where we miss
+/// a def.
+static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
+                          BasicBlock &BB) {
+  checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
+  checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
+  checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
+}
+#endif
+
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+                                GCPtrLivenessData &Data) {
+
+  SmallSetVector<BasicBlock *, 200> Worklist;
+  auto AddPredsToWorklist = [&](BasicBlock *BB) {
+    // We use a SetVector so that we don't have duplicates in the worklist.
+    Worklist.insert(pred_begin(BB), pred_end(BB));
+  };
+  auto NextItem = [&]() {
+    BasicBlock *BB = Worklist.back();
+    Worklist.pop_back();
+    return BB;
+  };
+
+  // Seed the liveness for each individual block
+  for (BasicBlock &BB : F) {
+    Data.KillSet[&BB] = computeKillSet(&BB);
+    Data.LiveSet[&BB].clear();
+    computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
+
+#ifndef NDEBUG
+    for (Value *Kill : Data.KillSet[&BB])
+      assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
+#endif
+
+    Data.LiveOut[&BB] = DenseSet<Value *>();
+    computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
+    Data.LiveIn[&BB] = Data.LiveSet[&BB];
+    set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
+    set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
+    if (!Data.LiveIn[&BB].empty())
+      AddPredsToWorklist(&BB);
+  }
+
+  // Propagate that liveness until stable
+  while (!Worklist.empty()) {
+    BasicBlock *BB = NextItem();
+
+    // Compute our new liveout set, then exit early if it hasn't changed
+    // despite the contribution of our successor.
+    DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+    const auto OldLiveOutSize = LiveOut.size();
+    for (BasicBlock *Succ : successors(BB)) {
+      assert(Data.LiveIn.count(Succ));
+      set_union(LiveOut, Data.LiveIn[Succ]);
+    }
+    // assert OutLiveOut is a subset of LiveOut
+    if (OldLiveOutSize == LiveOut.size()) {
+      // If the sets are the same size, then we didn't actually add anything
+      // when unioning our successors LiveIn  Thus, the LiveIn of this block
+      // hasn't changed.
+      continue;
+    }
+    Data.LiveOut[BB] = LiveOut;
+
+    // Apply the effects of this basic block
+    DenseSet<Value *> LiveTmp = LiveOut;
+    set_union(LiveTmp, Data.LiveSet[BB]);
+    set_subtract(LiveTmp, Data.KillSet[BB]);
+
+    assert(Data.LiveIn.count(BB));
+    const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
+    // assert: OldLiveIn is a subset of LiveTmp
+    if (OldLiveIn.size() != LiveTmp.size()) {
+      Data.LiveIn[BB] = LiveTmp;
+      AddPredsToWorklist(BB);
+    }
+  } // while( !worklist.empty() )
+
+#ifndef NDEBUG
+  // Sanity check our ouput against SSA properties.  This helps catch any
+  // missing kills during the above iteration.
+  for (BasicBlock &BB : F) {
+    checkBasicSSA(DT, Data, BB);
+  }
+#endif
+}
+
+static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
+                              StatepointLiveSetTy &Out) {
+
+  BasicBlock *BB = Inst->getParent();
+
+  // Note: The copy is intentional and required
+  assert(Data.LiveOut.count(BB));
+  DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+
+  // We want to handle the statepoint itself oddly.  It's
+  // call result is not live (normal), nor are it's arguments
+  // (unless they're used again later).  This adjustment is
+  // specifically what we need to relocate
+  BasicBlock::reverse_iterator rend(Inst);
+  computeLiveInValues(BB->rbegin(), rend, LiveOut);
+  LiveOut.erase(Inst);
+  Out.insert(LiveOut.begin(), LiveOut.end());
+}
+
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+                                  const CallSite &CS,
+                                  PartiallyConstructedSafepointRecord &Info) {
+  Instruction *Inst = CS.getInstruction();
+  StatepointLiveSetTy Updated;
+  findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+
+#ifndef NDEBUG
+  DenseSet<Value *> Bases;
+  for (auto KVPair : Info.PointerToBase) {
+    Bases.insert(KVPair.second);
+  }
+#endif
+  // We may have base pointers which are now live that weren't before.  We need
+  // to update the PointerToBase structure to reflect this.
+  for (auto V : Updated)
+    if (!Info.PointerToBase.count(V)) {
+      assert(Bases.count(V) && "can't find base for unexpected live value");
+      Info.PointerToBase[V] = V;
+      continue;
+    }
+
+#ifndef NDEBUG
+  for (auto V : Updated) {
+    assert(Info.PointerToBase.count(V) &&
+           "must be able to find base for live value");
+  }
+#endif
+
+  // Remove any stale base mappings - this can happen since our liveness is
+  // more precise then the one inherent in the base pointer analysis
+  DenseSet<Value *> ToErase;
+  for (auto KVPair : Info.PointerToBase)
+    if (!Updated.count(KVPair.first))
+      ToErase.insert(KVPair.first);
+  for (auto V : ToErase)
+    Info.PointerToBase.erase(V);
+
+#ifndef NDEBUG
+  for (auto KVPair : Info.PointerToBase)
+    assert(Updated.count(KVPair.first) && "record for non-live value");
+#endif
+
+  Info.liveset = Updated;
 }
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 875a007..bc068f7 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1012,7 +1012,8 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
 
   Constant *Ptr = Operands[0];
   auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
-  markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, Indices));
+  markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
+                                                  Indices));
 }
 
 void SCCPSolver::visitStoreInst(StoreInst &SI) {
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 06b000f..59dc528 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1166,10 +1166,9 @@ public:
       } else {
         continue;
       }
-      Instruction *DbgVal =
-          DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
-                                      DIExpression(DVI->getExpression()), Inst);
-      DbgVal->setDebugLoc(DVI->getDebugLoc());
+      DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+                                  DIExpression(DVI->getExpression()),
+                                  DVI->getDebugLoc(), Inst);
     }
   }
 };
@@ -1552,7 +1551,8 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
   if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
     return BasePtr;
 
-  return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
+  return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
+                               NamePrefix + "sroa_idx");
 }
 
 /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1803,7 +1803,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
 
     OffsetPtr = Int8PtrOffset == 0
                     ? Int8Ptr
-                    : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
+                    : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
+                                            IRB.getInt(Int8PtrOffset),
                                             NamePrefix + "sroa_raw_idx");
   }
   Ptr = OffsetPtr;
@@ -3250,7 +3251,8 @@ private:
     void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
       assert(Ty->isSingleValueType());
       // Load the single value and insert it using the indices.
-      Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+      Value *GEP =
+          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
       Value *Load = IRB.CreateLoad(GEP, Name + ".load");
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       DEBUG(dbgs() << "          to: " << *Load << "\n");
@@ -3283,7 +3285,7 @@ private:
       // Extract the single value and store it using the indices.
       Value *Store = IRB.CreateStore(
           IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
-          IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
+          IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
       (void)Store;
       DEBUG(dbgs() << "          to: " << *Store << "\n");
     }
@@ -4188,14 +4190,14 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
       // Create a piece expression describing the new partition or reuse AI's
       // expression if there is only one partition.
       DIExpression PieceExpr = Expr;
-      if (IsSplit || Expr.isBitPiece()) {
+      if (IsSplit || Expr->isBitPiece()) {
         // If this alloca is already a scalar replacement of a larger aggregate,
         // Piece.Offset describes the offset inside the scalar.
-        uint64_t Offset = Expr.isBitPiece() ? Expr.getBitPieceOffset() : 0;
+        uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
         uint64_t Start = Offset + Piece.Offset;
         uint64_t Size = Piece.Size;
-        if (Expr.isBitPiece()) {
-          uint64_t AbsEnd = Expr.getBitPieceOffset() + Expr.getBitPieceSize();
+        if (Expr->isBitPiece()) {
+          uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
           if (Start >= AbsEnd)
             // No need to describe a SROAed padding.
             continue;
@@ -4208,8 +4210,8 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
       if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
         OldDDI->eraseFromParent();
 
-      auto *NewDDI = DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, &AI);
-      NewDDI->setDebugLoc(DbgDecl->getDebugLoc());
+      DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
+                        &AI);
     }
   }
   return Changed;
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 3e7cf04..f99fe3f 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -217,16 +217,16 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
 /// \returns The profiled weight of I.
 unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
   DebugLoc DLoc = Inst.getDebugLoc();
-  if (DLoc.isUnknown())
+  if (!DLoc)
     return 0;
 
   unsigned Lineno = DLoc.getLine();
   if (Lineno < HeaderLineno)
     return 0;
 
-  DILocation DIL(DLoc.getAsMDNode(*Ctx));
+  DILocation DIL = DLoc.get();
   int LOffset = Lineno - HeaderLineno;
-  unsigned Discriminator = DIL.getDiscriminator();
+  unsigned Discriminator = DIL->getDiscriminator();
   unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
   DEBUG(dbgs() << "    " << Lineno << "." << Discriminator << ":" << Inst
                << " (line offset: " << LOffset << "." << Discriminator
@@ -642,9 +642,8 @@ void SampleProfileLoader::propagateWeights(Function &F) {
 /// \returns the line number where \p F is defined. If it returns 0,
 ///          it means that there is no debug information available for \p F.
 unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
-  DISubprogram S = getDISubprogram(&F);
-  if (S.isSubprogram())
-    return S.getLineNumber();
+  if (MDSubprogram *S = getDISubprogram(&F))
+    return S->getLine();
 
   // If could not find the start of \p F, emit a diagnostic to inform the user
   // about the missed opportunity.
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 6cc8411..42095ae 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -59,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
   initializeMergedLoadStoreMotionPass(Registry);
+  initializeNaryReassociatePass(Registry);
   initializePartiallyInlineLibCallsPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
@@ -77,6 +78,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLoadCombinePass(Registry);
   initializePlaceBackedgeSafepointsImplPass(Registry);
   initializePlaceSafepointsPass(Registry);
+  initializeFloat2IntPass(Registry);
 }
 
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index acd8585..693c5ae 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1117,10 +1117,9 @@ public:
       } else {
         continue;
       }
-      Instruction *DbgVal = DIB->insertDbgValueIntrinsic(
-          Arg, 0, DIVariable(DVI->getVariable()),
-          DIExpression(DVI->getExpression()), Inst);
-      DbgVal->setDebugLoc(DVI->getDebugLoc());
+      DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+                                   DIExpression(DVI->getExpression()),
+                                   DVI->getDebugLoc(), Inst);
     }
   }
 };
@@ -2135,7 +2134,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
     // split the alloca again later.
     unsigned AS = AI->getType()->getAddressSpace();
     Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
-    V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
+    V = Builder.CreateGEP(Builder.getInt8Ty(), V, Builder.getInt64(NewOffset));
 
     IdxTy = NewElts[Idx]->getAllocatedType();
     uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset;
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index a457cba..d55dc6a 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -213,7 +213,7 @@ Value *Scatterer::operator[](unsigned I) {
       CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
     }
     if (I != 0)
-      CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
+      CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
                                          V->getName() + ".i" + Twine(I));
   } else {
     // Search through a chain of InsertElementInsts looking for element I.
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 1a04d74..8af4753 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -757,14 +757,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
         }
       }
       // Create an ugly GEP with a single index for each index.
-      ResultPtr = Builder.CreateGEP(ResultPtr, Idx, "uglygep");
+      ResultPtr =
+          Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
     }
   }
 
   // Create a GEP with the constant offset index.
   if (AccumulativeByteOffset != 0) {
     Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
-    ResultPtr = Builder.CreateGEP(ResultPtr, Offset, "uglygep");
+    ResultPtr =
+        Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
   }
   if (ResultPtr->getType() != Variadic->getType())
     ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index e71031c..2fc9368 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -15,42 +15,46 @@
 //
 // There are many optimizations we can perform in the domain of SLSR. This file
 // for now contains only an initial step. Specifically, we look for strength
-// reduction candidates in two forms:
+// reduction candidates in the following forms:
 //
-// Form 1: (B + i) * S
-// Form 2: &B[i * S]
+// Form 1: B + i * S
+// Form 2: (B + i) * S
+// Form 3: &B[i * S]
 //
 // where S is an integer variable, and i is a constant integer. If we found two
-// candidates
+// candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2
+// in a simpler way with respect to S1. For example,
 //
-// S1: X = (B + i) * S
-// S2: Y = (B + i') * S
+// S1: X = B + i * S
+// S2: Y = B + i' * S   => X + (i' - i) * S
 //
-// or
+// S1: X = (B + i) * S
+// S2: Y = (B + i') * S => X + (i' - i) * S
 //
 // S1: X = &B[i * S]
-// S2: Y = &B[i' * S]
-//
-// and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with
+// S2: Y = &B[i' * S]   => &X[(i' - i) * S]
 //
-// Y = X + (i' - i) * S
+// Note: (i' - i) * S is folded to the extent possible.
 //
-// or
+// This rewriting is in general a good idea. The code patterns we focus on
+// usually come from loop unrolling, so (i' - i) * S is likely the same
+// across iterations and can be reused. When that happens, the optimized form
+// takes only one add starting from the second iteration.
 //
-// Y = &X[(i' - i) * S]
-//
-// where (i' - i) * S is folded to the extent possible. When S2 has multiple
-// bases, we pick the one that is closest to S2, or S2's "immediate" basis.
+// When such rewriting is possible, we call S1 a "basis" of S2. When S2 has
+// multiple bases, we choose to rewrite S2 with respect to its "immediate"
+// basis, the basis that is the closest ancestor in the dominator tree.
 //
 // TODO:
 //
-// - Handle candidates in the form of B + i * S
-//
 // - Floating point arithmetics when fast math is enabled.
 //
 // - SLSR may decrease ILP at the architecture level. Targets that are very
 //   sensitive to ILP may want to disable it. Having SLSR to consider ILP is
 //   left as future work.
+//
+// - When (i' - i) is constant but i and i' are not, we could still perform
+//   SLSR.
 #include <vector>
 
 #include "llvm/ADT/DenseSet.h"
@@ -72,13 +76,12 @@ namespace {
 
 class StraightLineStrengthReduce : public FunctionPass {
 public:
-  // SLSR candidate. Such a candidate must be in the form of
-  //   (Base + Index) * Stride
-  // or
-  //   Base[..][Index * Stride][..]
+  // SLSR candidate. Such a candidate must be in one of the forms described in
+  // the header comments.
   struct Candidate : public ilist_node<Candidate> {
     enum Kind {
       Invalid, // reserved for the default constructor
+      Add,     // B + i * S
       Mul,     // (B + i) * S
       GEP,     // &B[..][i * S][..]
     };
@@ -92,14 +95,14 @@ public:
           Basis(nullptr) {}
     Kind CandidateKind;
     const SCEV *Base;
-    // Note that Index and Stride of a GEP candidate may not have the same
-    // integer type. In that case, during rewriting, Stride will be
+    // Note that Index and Stride of a GEP candidate do not necessarily have the
+    // same integer type. In that case, during rewriting, Stride will be
     // sign-extended or truncated to Index's type.
     ConstantInt *Index;
     Value *Stride;
     // The instruction this candidate corresponds to. It helps us to rewrite a
     // candidate with respect to its immediate basis. Note that one instruction
-    // can corresponds to multiple candidates depending on how you associate the
+    // can correspond to multiple candidates depending on how you associate the
     // expression. For instance,
     //
     // (a + 1) * (b + 2)
@@ -143,31 +146,43 @@ private:
   // Returns true if Basis is a basis for C, i.e., Basis dominates C and they
   // share the same base and stride.
   bool isBasisFor(const Candidate &Basis, const Candidate &C);
+  // Returns whether the candidate can be folded into an addressing mode.
+  bool isFoldable(const Candidate &C, TargetTransformInfo *TTI,
+                  const DataLayout *DL);
+  // Returns true if C is already in a simplest form and not worth being
+  // rewritten.
+  bool isSimplestForm(const Candidate &C);
   // Checks whether I is in a candidate form. If so, adds all the matching forms
   // to Candidates, and tries to find the immediate basis for each of them.
-  void allocateCandidateAndFindBasis(Instruction *I);
+  void allocateCandidatesAndFindBasis(Instruction *I);
+  // Allocate candidates and find bases for Add instructions.
+  void allocateCandidatesAndFindBasisForAdd(Instruction *I);
+  // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a
+  // candidate.
+  void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS,
+                                            Instruction *I);
   // Allocate candidates and find bases for Mul instructions.
-  void allocateCandidateAndFindBasisForMul(Instruction *I);
+  void allocateCandidatesAndFindBasisForMul(Instruction *I);
   // Splits LHS into Base + Index and, if succeeds, calls
-  // allocateCandidateAndFindBasis.
-  void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS,
-                                           Instruction *I);
+  // allocateCandidatesAndFindBasis.
+  void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS,
+                                            Instruction *I);
   // Allocate candidates and find bases for GetElementPtr instructions.
-  void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP);
+  void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP);
   // A helper function that scales Idx with ElementSize before invoking
-  // allocateCandidateAndFindBasis.
-  void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
-                                           Value *S, uint64_t ElementSize,
-                                           Instruction *I);
+  // allocateCandidatesAndFindBasis.
+  void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
+                                            Value *S, uint64_t ElementSize,
+                                            Instruction *I);
   // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
   // basis.
-  void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B,
-                                     ConstantInt *Idx, Value *S,
-                                     Instruction *I);
+  void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B,
+                                      ConstantInt *Idx, Value *S,
+                                      Instruction *I);
   // Rewrites candidate C with respect to Basis.
   void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
   // A helper function that factors ArrayIdx to a product of a stride and a
-  // constant index, and invokes allocateCandidateAndFindBasis with the
+  // constant index, and invokes allocateCandidatesAndFindBasis with the
   // factorings.
   void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
                         GetElementPtrInst *GEP);
@@ -187,7 +202,7 @@ private:
   // Temporarily holds all instructions that are unlinked (but not deleted) by
   // rewriteCandidateWithBasis. These instructions will be actually removed
   // after all rewriting finishes.
-  DenseSet<Instruction *> UnlinkedInstructions;
+  std::vector<Instruction *> UnlinkedInstructions;
 };
 }  // anonymous namespace
 
@@ -215,9 +230,9 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
           Basis.CandidateKind == C.CandidateKind);
 }
 
-static bool isCompletelyFoldable(GetElementPtrInst *GEP,
-                                 const TargetTransformInfo *TTI,
-                                 const DataLayout *DL) {
+static bool isGEPFoldable(GetElementPtrInst *GEP,
+                          const TargetTransformInfo *TTI,
+                          const DataLayout *DL) {
   GlobalVariable *BaseGV = nullptr;
   int64_t BaseOffset = 0;
   bool HasBaseReg = false;
@@ -252,53 +267,143 @@ static bool isCompletelyFoldable(GetElementPtrInst *GEP,
                                     BaseOffset, HasBaseReg, Scale);
 }
 
-// TODO: We currently implement an algorithm whose time complexity is linear to
-// the number of existing candidates. However, a better algorithm exists. We
-// could depth-first search the dominator tree, and maintain a hash table that
-// contains all candidates that dominate the node being traversed.  This hash
-// table is indexed by the base and the stride of a candidate.  Therefore,
-// finding the immediate basis of a candidate boils down to one hash-table look
-// up.
-void StraightLineStrengthReduce::allocateCandidateAndFindBasis(
-    Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
-    Instruction *I) {
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
-    // If &B[Idx * S] fits into an addressing mode, do not turn it into
-    // non-free computation.
-    if (isCompletelyFoldable(GEP, TTI, DL))
-      return;
+// Returns whether (Base + Index * Stride) can be folded to an addressing mode.
+static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride,
+                          TargetTransformInfo *TTI) {
+  return TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true,
+                                    Index->getSExtValue());
+}
+
+bool StraightLineStrengthReduce::isFoldable(const Candidate &C,
+                                            TargetTransformInfo *TTI,
+                                            const DataLayout *DL) {
+  if (C.CandidateKind == Candidate::Add)
+    return isAddFoldable(C.Base, C.Index, C.Stride, TTI);
+  if (C.CandidateKind == Candidate::GEP)
+    return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI, DL);
+  return false;
+}
+
+// Returns true if GEP has zero or one non-zero index.
+static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) {
+  unsigned NumNonZeroIndices = 0;
+  for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
+    ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
+    if (ConstIdx == nullptr || !ConstIdx->isZero())
+      ++NumNonZeroIndices;
+  }
+  return NumNonZeroIndices <= 1;
+}
+
+bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) {
+  if (C.CandidateKind == Candidate::Add) {
+    // B + 1 * S or B + (-1) * S
+    return C.Index->isOne() || C.Index->isMinusOne();
+  }
+  if (C.CandidateKind == Candidate::Mul) {
+    // (B + 0) * S
+    return C.Index->isZero();
+  }
+  if (C.CandidateKind == Candidate::GEP) {
+    // (char*)B + S or (char*)B - S
+    return ((C.Index->isOne() || C.Index->isMinusOne()) &&
+            hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins)));
   }
+  return false;
+}
 
+// TODO: We currently implement an algorithm whose time complexity is linear in
+// the number of existing candidates. However, we could do better by using
+// ScopedHashTable. Specifically, while traversing the dominator tree, we could
+// maintain all the candidates that dominate the basic block being traversed in
+// a ScopedHashTable. This hash table is indexed by the base and the stride of
+// a candidate. Therefore, finding the immediate basis of a candidate boils down
+// to one hash-table look up.
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+    Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+    Instruction *I) {
   Candidate C(CT, B, Idx, S, I);
-  // Try to compute the immediate basis of C.
-  unsigned NumIterations = 0;
-  // Limit the scan radius to avoid running forever.
-  static const unsigned MaxNumIterations = 50;
-  for (auto Basis = Candidates.rbegin();
-       Basis != Candidates.rend() && NumIterations < MaxNumIterations;
-       ++Basis, ++NumIterations) {
-    if (isBasisFor(*Basis, C)) {
-      C.Basis = &(*Basis);
-      break;
+  // SLSR can complicate an instruction in two cases:
+  //
+  // 1. If we can fold I into an addressing mode, computing I is likely free or
+  // takes only one instruction.
+  //
+  // 2. I is already in a simplest form. For example, when
+  //      X = B + 8 * S
+  //      Y = B + S,
+  //    rewriting Y to X - 7 * S is probably a bad idea.
+  //
+  // In the above cases, we still add I to the candidate list so that I can be
+  // the basis of other candidates, but we leave I's basis blank so that I
+  // won't be rewritten.
+  if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) {
+    // Try to compute the immediate basis of C.
+    unsigned NumIterations = 0;
+    // Limit the scan radius to avoid running in quadratice time.
+    static const unsigned MaxNumIterations = 50;
+    for (auto Basis = Candidates.rbegin();
+         Basis != Candidates.rend() && NumIterations < MaxNumIterations;
+         ++Basis, ++NumIterations) {
+      if (isBasisFor(*Basis, C)) {
+        C.Basis = &(*Basis);
+        break;
+      }
     }
   }
   // Regardless of whether we find a basis for C, we need to push C to the
-  // candidate list.
+  // candidate list so that it can be the basis of other candidates.
   Candidates.push_back(C);
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) {
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasis(
+    Instruction *I) {
   switch (I->getOpcode()) {
+  case Instruction::Add:
+    allocateCandidatesAndFindBasisForAdd(I);
+    break;
   case Instruction::Mul:
-    allocateCandidateAndFindBasisForMul(I);
+    allocateCandidatesAndFindBasisForMul(I);
     break;
   case Instruction::GetElementPtr:
-    allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I));
+    allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I));
     break;
   }
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+    Instruction *I) {
+  // Try matching B + i * S.
+  if (!isa<IntegerType>(I->getType()))
+    return;
+
+  assert(I->getNumOperands() == 2 && "isn't I an add?");
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  allocateCandidatesAndFindBasisForAdd(LHS, RHS, I);
+  if (LHS != RHS)
+    allocateCandidatesAndFindBasisForAdd(RHS, LHS, I);
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd(
+    Value *LHS, Value *RHS, Instruction *I) {
+  Value *S = nullptr;
+  ConstantInt *Idx = nullptr;
+  if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) {
+    // I = LHS + RHS = LHS + Idx * S
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+  } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) {
+    // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx)
+    APInt One(Idx->getBitWidth(), 1);
+    Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue());
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I);
+  } else {
+    // At least, I = LHS + 1 * RHS
+    ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1);
+    allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS,
+                                   I);
+  }
+}
+
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
     Value *LHS, Value *RHS, Instruction *I) {
   Value *B = nullptr;
   ConstantInt *Idx = nullptr;
@@ -306,54 +411,54 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
   if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) {
     // If LHS is in the form of "Base + Index", then I is in the form of
     // "(Base + Index) * RHS".
-    allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
+    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
   } else {
     // Otherwise, at least try the form (LHS + 0) * RHS.
     ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
-    allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
+    allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
                                   I);
   }
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul(
     Instruction *I) {
   // Try matching (B + i) * S.
   // TODO: we could extend SLSR to float and vector types.
   if (!isa<IntegerType>(I->getType()))
     return;
 
+  assert(I->getNumOperands() == 2 && "isn't I a mul?");
   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
-  allocateCandidateAndFindBasisForMul(LHS, RHS, I);
+  allocateCandidatesAndFindBasisForMul(LHS, RHS, I);
   if (LHS != RHS) {
     // Symmetrically, try to split RHS to Base + Index.
-    allocateCandidateAndFindBasisForMul(RHS, LHS, I);
+    allocateCandidatesAndFindBasisForMul(RHS, LHS, I);
   }
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
     const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
     Instruction *I) {
-  // I = B + sext(Idx *nsw S) *nsw ElementSize
+  // I = B + sext(Idx *nsw S) * ElementSize
+  //   = B + (sext(Idx) * sext(S)) * ElementSize
   //   = B + (sext(Idx) * ElementSize) * sext(S)
   // Casting to IntegerType is safe because we skipped vector GEPs.
   IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
   ConstantInt *ScaledIdx = ConstantInt::get(
       IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
-  allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
+  allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
 }
 
 void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
                                                   const SCEV *Base,
                                                   uint64_t ElementSize,
                                                   GetElementPtrInst *GEP) {
-  // At least, ArrayIdx = ArrayIdx *s 1.
-  allocateCandidateAndFindBasisForGEP(
+  // At least, ArrayIdx = ArrayIdx *nsw 1.
+  allocateCandidatesAndFindBasisForGEP(
       Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
       ArrayIdx, ElementSize, GEP);
   Value *LHS = nullptr;
   ConstantInt *RHS = nullptr;
-  // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4).
-  //
   // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
   // itself. This would allow us to handle the shl case for free. However,
   // matching SCEVs has two issues:
@@ -367,12 +472,19 @@ void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
   // sext'ed multiplication.
   if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
     // SLSR is currently unsafe if i * S may overflow.
-    // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize
-    allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+    // GEP = Base + sext(LHS *nsw RHS) * ElementSize
+    allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+  } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) {
+    // GEP = Base + sext(LHS <<nsw RHS) * ElementSize
+    //     = Base + sext(LHS *nsw (1 << RHS)) * ElementSize
+    APInt One(RHS->getBitWidth(), 1);
+    ConstantInt *PowerOf2 =
+        ConstantInt::get(RHS->getContext(), One << RHS->getValue());
+    allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP);
   }
 }
 
-void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
+void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
     GetElementPtrInst *GEP) {
   // TODO: handle vector GEPs
   if (GEP->getType()->isVectorTy())
@@ -436,6 +548,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
     else
       BumpWithUglyGEP = true;
   }
+
   // Compute Bump = C - Basis = (i' - i) * S.
   // Common case 1: if (i' - i) is 1, Bump = S.
   if (IndexOffset.getSExtValue() == 1)
@@ -443,9 +556,24 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
   // Common case 2: if (i' - i) is -1, Bump = -S.
   if (IndexOffset.getSExtValue() == -1)
     return Builder.CreateNeg(C.Stride);
-  // Otherwise, Bump = (i' - i) * sext/trunc(S).
-  ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset);
-  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType());
+
+  // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
+  // have different bit widths.
+  IntegerType *DeltaType =
+      IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth());
+  Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType);
+  if (IndexOffset.isPowerOf2()) {
+    // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i).
+    ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
+    return Builder.CreateShl(ExtendedStride, Exponent);
+  }
+  if ((-IndexOffset).isPowerOf2()) {
+    // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
+    ConstantInt *Exponent =
+        ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
+    return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent));
+  }
+  Constant *Delta = ConstantInt::get(DeltaType, IndexOffset);
   return Builder.CreateMul(ExtendedStride, Delta);
 }
 
@@ -453,6 +581,9 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
     const Candidate &C, const Candidate &Basis) {
   assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
          C.Stride == Basis.Stride);
+  // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
+  // basis of a candidate cannot be unlinked before the candidate.
+  assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");
 
   // An instruction can correspond to multiple candidates. Therefore, instead of
   // simply deleting an instruction when we rewrite it, we mark its parent as
@@ -466,25 +597,38 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
   Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
   Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
   switch (C.CandidateKind) {
+  case Candidate::Add:
   case Candidate::Mul:
-    Reduced = Builder.CreateAdd(Basis.Ins, Bump);
+    if (BinaryOperator::isNeg(Bump)) {
+      Reduced =
+          Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
+    } else {
+      Reduced = Builder.CreateAdd(Basis.Ins, Bump);
+    }
     break;
   case Candidate::GEP:
     {
       Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
+      bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
       if (BumpWithUglyGEP) {
         // C = (char *)Basis + Bump
         unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
         Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
         Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
-        // We only considered inbounds GEP as candidates.
-        Reduced = Builder.CreateInBoundsGEP(Reduced, Bump);
+        if (InBounds)
+          Reduced =
+              Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
+        else
+          Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
         Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
       } else {
         // C = gep Basis, Bump
         // Canonicalize bump to pointer size.
         Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
-        Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump);
+        if (InBounds)
+          Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
+        else
+          Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
       }
     }
     break;
@@ -497,7 +641,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
   // Unlink C.Ins so that we can skip other candidates also corresponding to
   // C.Ins. The actual deletion is postponed to the end of runOnFunction.
   C.Ins->removeFromParent();
-  UnlinkedInstructions.insert(C.Ins);
+  UnlinkedInstructions.push_back(C.Ins);
 }
 
 bool StraightLineStrengthReduce::runOnFunction(Function &F) {
@@ -512,7 +656,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
   for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
        node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
     for (auto &I : *node->getBlock())
-      allocateCandidateAndFindBasis(&I);
+      allocateCandidatesAndFindBasis(&I);
   }
 
   // Rewrite candidates in the reverse depth-first order. This order makes sure
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 6c3ce58..4f23e20 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -887,7 +887,7 @@ void StructurizeCFG::createFlow() {
 /// no longer dominate all their uses. Not sure if this is really nessasary
 void StructurizeCFG::rebuildSSA() {
   SSAUpdater Updater;
-  for (const auto &BB : ParentRegion->blocks())
+  for (auto *BB : ParentRegion->blocks())
     for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
          II != IE; ++II) {
 
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 820544b..c1cd39a 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -174,42 +174,51 @@ bool AddDiscriminators::runOnFunction(Function &F) {
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
     BasicBlock *B = I;
     TerminatorInst *Last = B->getTerminator();
-    DebugLoc LastLoc = Last->getDebugLoc();
-    if (LastLoc.isUnknown()) continue;
-    DILocation LastDIL(LastLoc.getAsMDNode(Ctx));
+    DILocation LastDIL = Last->getDebugLoc().get();
+    if (!LastDIL)
+      continue;
 
     for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
       BasicBlock *Succ = Last->getSuccessor(I);
       Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
-      DebugLoc FirstLoc = First->getDebugLoc();
-      if (FirstLoc.isUnknown()) continue;
-      DILocation FirstDIL(FirstLoc.getAsMDNode(Ctx));
+      DILocation FirstDIL = First->getDebugLoc().get();
+      if (!FirstDIL)
+        continue;
 
       // If the first instruction (First) of Succ is at the same file
       // location as B's last instruction (Last), add a new
       // discriminator for First's location and all the instructions
       // in Succ that share the same location with First.
-      if (FirstDIL.atSameLineAs(LastDIL)) {
+      if (!FirstDIL->canDiscriminate(*LastDIL)) {
         // Create a new lexical scope and compute a new discriminator
         // number for it.
-        StringRef Filename = FirstDIL.getFilename();
-        DIScope Scope = FirstDIL.getScope();
-        DIFile File = Builder.createFile(Filename, Scope.getDirectory());
-        unsigned Discriminator = FirstDIL.computeNewDiscriminator(Ctx);
+        StringRef Filename = FirstDIL->getFilename();
+        auto *Scope = FirstDIL->getScope();
+        DIFile File = Builder.createFile(Filename, Scope->getDirectory());
+
+        // FIXME: Calculate the discriminator here, based on local information,
+        // and delete MDLocation::computeNewDiscriminator().  The current
+        // solution gives different results depending on other modules in the
+        // same context.  All we really need is to discriminate between
+        // FirstDIL and LastDIL -- a local map would suffice.
+        unsigned Discriminator = FirstDIL->computeNewDiscriminator();
         DILexicalBlockFile NewScope =
             Builder.createLexicalBlockFile(Scope, File, Discriminator);
-        DILocation NewDIL = FirstDIL.copyWithNewScope(Ctx, NewScope);
-        DebugLoc newDebugLoc = DebugLoc::getFromDILocation(NewDIL);
+        auto *NewDIL =
+            MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
+                            NewScope, FirstDIL->getInlinedAt());
+        DebugLoc newDebugLoc = NewDIL;
 
         // Attach this new debug location to First and every
         // instruction following First that shares the same location.
         for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
              ++I1) {
-          if (I1->getDebugLoc() != FirstLoc) break;
+          if (I1->getDebugLoc().get() != FirstDIL)
+            break;
           I1->setDebugLoc(newDebugLoc);
-          DEBUG(dbgs() << NewDIL.getFilename() << ":" << NewDIL.getLineNumber()
-                       << ":" << NewDIL.getColumnNumber() << ":"
-                       << NewDIL.getDiscriminator() << *I1 << "\n");
+          DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
+                       << ":" << NewDIL->getColumn() << ":"
+                       << NewDIL->getDiscriminator() << *I1 << "\n");
         }
         DEBUG(dbgs() << "\n");
         Changed = true;
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index f04ea9c..f200b58 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -157,20 +157,21 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 // Find the MDNode which corresponds to the DISubprogram data that described F.
 static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
   for (DISubprogram Subprogram : Finder.subprograms()) {
-    if (Subprogram.describes(F)) return Subprogram;
+    if (Subprogram->describes(F))
+      return Subprogram;
   }
   return nullptr;
 }
 
 // Add an operand to an existing MDNode. The new operand will be added at the
 // back of the operand list.
-static void AddOperand(DICompileUnit CU, DIArray SPs, Metadata *NewSP) {
+static void AddOperand(DICompileUnit CU, MDSubprogramArray SPs, Metadata *NewSP) {
   SmallVector<Metadata *, 16> NewSPs;
-  NewSPs.reserve(SPs->getNumOperands() + 1);
-  for (unsigned I = 0, E = SPs->getNumOperands(); I != E; ++I)
-    NewSPs.push_back(SPs->getOperand(I));
+  NewSPs.reserve(SPs.size() + 1);
+  for (auto *SP : SPs)
+    NewSPs.push_back(SP);
   NewSPs.push_back(NewSP);
-  CU.replaceSubprograms(DIArray(MDNode::get(CU->getContext(), NewSPs)));
+  CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));
 }
 
 // Clone the module-level debug info associated with OldFunc. The cloned data
@@ -186,15 +187,15 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
   // Ensure that OldFunc appears in the map.
   // (if it's already there it must point to NewFunc anyway)
   VMap[OldFunc] = NewFunc;
-  DISubprogram NewSubprogram(MapMetadata(OldSubprogramMDNode, VMap));
+  DISubprogram NewSubprogram =
+      cast<MDSubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
 
   for (DICompileUnit CU : Finder.compile_units()) {
-    DIArray Subprograms(CU.getSubprograms());
-
+    auto Subprograms = CU->getSubprograms();
     // If the compile unit's function list contains the old function, it should
     // also contain the new one.
-    for (unsigned i = 0; i < Subprograms.getNumElements(); i++) {
-      if ((MDNode*)Subprograms.getElement(i) == OldSubprogramMDNode) {
+    for (auto *SP : Subprograms) {
+      if (SP == OldSubprogramMDNode) {
         AddOperand(CU, Subprograms, NewSubprogram);
         break;
       }
@@ -395,7 +396,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
     if (Action == CloningDirector::CloneSuccessors) {
       // If the director says to skip with a terminate instruction, we still
       // need to clone this block's successors.
-      const TerminatorInst *TI = BB->getTerminator();
+      const TerminatorInst *TI = NewBB->getTerminator();
       for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
         ToClone.push_back(TI->getSuccessor(i));
       return;
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 52e2d59..44b7d25 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -150,7 +150,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
         if (MSI->isVolatile())
           return true;
         GS.StoredType = GlobalStatus::Stored;
-      } else if (ImmutableCallSite C = I) {
+      } else if (auto C = ImmutableCallSite(I)) {
         if (!C.isCallee(&U))
           return true;
         GS.IsLoaded = true;
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index df3e1d4..a08ffbe 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -835,11 +835,10 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
                     DenseMap<const MDLocation *, MDLocation *> &IANodes) {
   SmallVector<MDLocation*, 3> InlinedAtLocations;
   MDLocation *Last = InlinedAtNode;
-  DebugLoc CurInlinedAt = DL;
+  MDLocation *CurInlinedAt = DL;
 
   // Gather all the inlined-at nodes
-  while (MDLocation *IA =
-             cast_or_null<MDLocation>(CurInlinedAt.getInlinedAt(Ctx))) {
+  while (MDLocation *IA = CurInlinedAt->getInlinedAt()) {
     // Skip any we've already built nodes for
     if (MDLocation *Found = IANodes[IA]) {
       Last = Found;
@@ -847,7 +846,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
     }
 
     InlinedAtLocations.push_back(IA);
-    CurInlinedAt = DebugLoc::getFromDILocation(IA);
+    CurInlinedAt = IA;
   }
 
   // Starting from the top, rebuild the nodes to point to the new inlined-at
@@ -862,7 +861,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
 
   // And finally create the normal location for this instruction, referring to
   // the new inlined-at chain.
-  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last);
+  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last);
 }
 
 /// Update inlined instructions' line numbers to
@@ -870,11 +869,11 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
                              Instruction *TheCall) {
   DebugLoc TheCallDL = TheCall->getDebugLoc();
-  if (TheCallDL.isUnknown())
+  if (!TheCallDL)
     return;
 
   auto &Ctx = Fn->getContext();
-  auto *InlinedAtNode = cast<MDLocation>(TheCallDL.getAsMDNode(Ctx));
+  MDLocation *InlinedAtNode = TheCallDL;
 
   // Create a unique call site, not to be confused with any other call from the
   // same location.
@@ -891,7 +890,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
          BI != BE; ++BI) {
       DebugLoc DL = BI->getDebugLoc();
-      if (DL.isUnknown()) {
+      if (!DL) {
         // If the inlined instruction has no line number, make it look as if it
         // originates from the call location. This is important for
         // ((__always_inline__, __nodebug__)) functions which must use caller
@@ -905,19 +904,6 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
         BI->setDebugLoc(TheCallDL);
       } else {
         BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes));
-        if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
-          LLVMContext &Ctx = BI->getContext();
-          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
-          DVI->setOperand(2, MetadataAsValue::get(
-                                 Ctx, createInlinedVariable(DVI->getVariable(),
-                                                            InlinedAt, Ctx)));
-        } else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
-          LLVMContext &Ctx = BI->getContext();
-          MDNode *InlinedAt = BI->getDebugLoc().getInlinedAt(Ctx);
-          DDI->setOperand(1, MetadataAsValue::get(
-                                 Ctx, createInlinedVariable(DDI->getVariable(),
-                                                            InlinedAt, Ctx)));
-        }
       }
     }
   }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index bd15f9e..1c9760e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -998,17 +998,14 @@ static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            StoreInst *SI, DIBuilder &Builder) {
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  DIVariable DIVar = DDI->getVariable();
+  DIExpression DIExpr = DDI->getExpression();
   if (!DIVar)
     return false;
 
   if (LdStHasDebugValue(DIVar, SI))
     return true;
 
-  Instruction *DbgVal = nullptr;
   // If an argument is zero extended then use argument directly. The ZExt
   // may be zapped by an optimization pass in future.
   Argument *ExtendedArg = nullptr;
@@ -1017,11 +1014,11 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
     ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
   if (ExtendedArg)
-    DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, SI);
+    Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr,
+                                    DDI->getDebugLoc(), SI);
   else
-    DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar,
-                                             DIExpr, SI);
-  DbgVal->setDebugLoc(DDI->getDebugLoc());
+    Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
+                                    DDI->getDebugLoc(), SI);
   return true;
 }
 
@@ -1029,19 +1026,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            LoadInst *LI, DIBuilder &Builder) {
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  DIVariable DIVar = DDI->getVariable();
+  DIExpression DIExpr = DDI->getExpression();
   if (!DIVar)
     return false;
 
   if (LdStHasDebugValue(DIVar, LI))
     return true;
 
-  Instruction *DbgVal =
-      Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, LI);
-  DbgVal->setDebugLoc(DDI->getDebugLoc());
+  Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr,
+                                  DDI->getDebugLoc(), LI);
   return true;
 }
 
@@ -1083,10 +1077,9 @@ bool llvm::LowerDbgDeclare(Function &F) {
           // This is a call by-value or some other instruction that
           // takes a pointer to the variable. Insert a *value*
           // intrinsic that describes the alloca.
-          auto DbgVal = DIB.insertDbgValueIntrinsic(
-              AI, 0, DIVariable(DDI->getVariable()),
-              DIExpression(DDI->getExpression()), CI);
-          DbgVal->setDebugLoc(DDI->getDebugLoc());
+          DIB.insertDbgValueIntrinsic(AI, 0, DIVariable(DDI->getVariable()),
+                                      DIExpression(DDI->getExpression()),
+                                      DDI->getDebugLoc(), CI);
         }
       DDI->eraseFromParent();
     }
@@ -1112,10 +1105,8 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
   if (!DDI)
     return false;
   DebugLoc Loc = DDI->getDebugLoc();
-  DIVariable DIVar(DDI->getVariable());
-  DIExpression DIExpr(DDI->getExpression());
-  assert((!DIVar || DIVar.isVariable()) &&
-         "Variable in DbgDeclareInst should be either null or a DIVariable.");
+  DIVariable DIVar = DDI->getVariable();
+  DIExpression DIExpr = DDI->getExpression();
   if (!DIVar)
     return false;
 
@@ -1127,16 +1118,14 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
     SmallVector<uint64_t, 4> NewDIExpr;
     NewDIExpr.push_back(dwarf::DW_OP_deref);
     if (DIExpr)
-      for (unsigned i = 0, n = DIExpr.getNumElements(); i < n; ++i)
-        NewDIExpr.push_back(DIExpr.getElement(i));
+      NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
     DIExpr = Builder.createExpression(NewDIExpr);
   }
 
   // Insert llvm.dbg.declare in the same basic block as the original alloca,
   // and remove old llvm.dbg.declare.
   BasicBlock *BB = AI->getParent();
-  Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, BB)
-    ->setDebugLoc(Loc);
+  Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB);
   DDI->eraseFromParent();
   return true;
 }
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 6b3aa02..1dbce47 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -146,6 +146,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
 /// execute without exiting the loop.
 ///
+/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that
+/// have a runtime (i.e. not compile time constant) trip count.  Unrolling these
+/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count"
+/// iterations before branching into the unrolled loop.  UnrollLoop will not
+/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
+/// AllowExpensiveTripCount is false.
+///
 /// The LoopInfo Analysis that is passed will be kept consistent.
 ///
 /// If a LoopPassManager is passed in, and the loop is fully removed, it will be
@@ -154,8 +161,9 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
 /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
 /// available from the Pass it must also preserve those analyses.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
-                      bool AllowRuntime, unsigned TripMultiple, LoopInfo *LI,
-                      Pass *PP, LPPassManager *LPM, AssumptionCache *AC) {
+                      bool AllowRuntime, bool AllowExpensiveTripCount,
+                      unsigned TripMultiple, LoopInfo *LI, Pass *PP,
+                      LPPassManager *LPM, AssumptionCache *AC) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -218,7 +226,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   // flag is specified.
   bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
 
-  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+  if (RuntimeTripCount &&
+      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))
     return false;
 
   // Notify ScalarEvolution that the loop will be substantially changed,
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 381d8fc..d1774df 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -278,7 +278,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
 /// ...
 /// End:
 ///
-bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
+                                   bool AllowExpensiveTripCount, LoopInfo *LI,
                                    LPPassManager *LPM) {
   // for now, only unroll loops that contain a single exit
   if (!L->getExitingBlock())
@@ -312,15 +313,20 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   if (isa<SCEVCouldNotCompute>(TripCountSC))
     return false;
 
+  BasicBlock *Header = L->getHeader();
+  const DataLayout &DL = Header->getModule()->getDataLayout();
+  SCEVExpander Expander(*SE, DL, "loop-unroll");
+  if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
+    return false;
+
   // We only handle cases when the unroll factor is a power of 2.
   // Count is the loop unroll factor, the number of extra copies added + 1.
   if (!isPowerOf2_32(Count))
     return false;
 
   // This constraint lets us deal with an overflowing trip count easily; see the
-  // comment on ModVal below.  This check is equivalent to `Log2(Count) <
-  // BEWidth`.
-  if (static_cast<uint64_t>(Count) > (1ULL << BEWidth))
+  // comment on ModVal below.
+  if (Log2_32(Count) > BEWidth)
     return false;
 
   // If this loop is nested, then the loop unroller changes the code in
@@ -333,18 +339,15 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
 
   BasicBlock *PH = L->getLoopPreheader();
-  BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   // It helps to splits the original preheader twice, one for the end of the
   // prolog code and one for a new loop preheader
   BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
   BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
   BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
-  const DataLayout &DL = Header->getModule()->getDataLayout();
 
   // Compute the number of extra iterations required, which is:
   //  extra iterations = run-time trip count % (loop unroll factor + 1)
-  SCEVExpander Expander(*SE, DL, "loop-unroll");
   Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                             PreHeaderBR);
   Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 35c701e..014574d 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -93,3 +94,13 @@ llvm::collectUsedGlobalVariables(Module &M, SmallPtrSetImpl<GlobalValue *> &Set,
   }
   return GV;
 }
+
+Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
+  if (isa<Function>(FuncOrBitcast))
+    return cast<Function>(FuncOrBitcast);
+  FuncOrBitcast->dump();
+  std::string Err;
+  raw_string_ostream Stream(Err);
+  Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
+  report_fatal_error(Err);
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 4b34b19..54e1733 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -872,8 +872,10 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
   }
 
   SmallVector<std::pair<unsigned, BasicBlock *>, 32> DFBlocks;
-  SmallPtrSet<DomTreeNode *, 32> Visited;
   SmallVector<DomTreeNode *, 32> Worklist;
+  SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
+  SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
+
   while (!PQ.empty()) {
     DomTreeNodePair RootPair = PQ.top();
     PQ.pop();
@@ -887,6 +889,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
 
     Worklist.clear();
     Worklist.push_back(Root);
+    VisitedWorklist.insert(Root);
 
     while (!Worklist.empty()) {
       DomTreeNode *Node = Worklist.pop_back_val();
@@ -905,7 +908,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
         if (SuccLevel > RootLevel)
           continue;
 
-        if (!Visited.insert(SuccNode).second)
+        if (!VisitedPQ.insert(SuccNode).second)
           continue;
 
         BasicBlock *SuccBB = SuccNode->getBlock();
@@ -919,7 +922,7 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
 
       for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
            ++CI) {
-        if (!Visited.count(*CI))
+        if (VisitedWorklist.insert(*CI).second)
           Worklist.push_back(*CI);
       }
     }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c7c0ca6..7c239cb 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1502,7 +1502,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     if (isa<DbgInfoIntrinsic>(I))
       continue;
 
-    // Only speculatively execution a single instruction (not counting the
+    // Only speculatively execute a single instruction (not counting the
     // terminator) for now.
     ++SpeculationCost;
     if (SpeculationCost > 1)
@@ -3884,8 +3884,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
                                    "switch.tableidx.zext");
 
       Value *GEPIndices[] = { Builder.getInt32(0), Index };
-      Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
-                                             "switch.gep");
+      Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
+                                             GEPIndices, "switch.gep");
       return Builder.CreateLoad(GEP, "switch.load");
     }
   }
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 5867d65..42102e7 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -222,7 +222,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
   // Now that we have the destination's length, we must index into the
   // destination's pointer to get the actual memcpy destination (end of
   // the string .. we're concatenating).
-  Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+  Value *CpyDst = B.CreateGEP(B.getInt8Ty(), Dst, DstLen, "endptr");
 
   // We have enough information to now generate the memcpy call to do the
   // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
@@ -303,7 +303,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
   StringRef Str;
   if (!getConstantStringInfo(SrcStr, Str)) {
     if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
-      return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
+      return B.CreateGEP(B.getInt8Ty(), SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
     return nullptr;
   }
 
@@ -316,7 +316,7 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
     return Constant::getNullValue(CI->getType());
 
   // strchr(s+n,c)  -> gep(s+n+i,c)
-  return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strchr");
 }
 
 Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
@@ -351,7 +351,7 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
     return Constant::getNullValue(CI->getType());
 
   // strrchr(s+n,c) -> gep(s+n+i,c)
-  return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "strrchr");
 }
 
 Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
@@ -476,7 +476,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
   Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
   if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
     Value *StrLen = EmitStrLen(Src, B, DL, TLI);
-    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
   }
 
   // See if we can get the length of the input string.
@@ -487,7 +487,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
   Type *PT = FT->getParamType(0);
   Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
   Value *DstEnd =
-      B.CreateGEP(Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
+      B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
 
   // We have enough information to now generate the memcpy call to do the
   // copy for us.  Make a memcpy to copy the nul byte with align = 1.
@@ -597,7 +597,7 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
     if (I == StringRef::npos) // No match.
       return Constant::getNullValue(CI->getType());
 
-    return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+    return B.CreateGEP(B.getInt8Ty(), CI->getArgOperand(0), B.getInt64(I), "strpbrk");
   }
 
   // strpbrk(s, "a") -> strchr(s, 'a')
@@ -828,7 +828,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
     return Constant::getNullValue(CI->getType());
 
   // memchr(s+n,c,l) -> gep(s+n+i,c)
-  return B.CreateGEP(SrcStr, B.getInt64(I), "memchr");
+  return B.CreateGEP(B.getInt8Ty(), SrcStr, B.getInt64(I), "memchr");
 }
 
 Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
@@ -1671,7 +1671,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
     Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
     Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
     B.CreateStore(V, Ptr);
-    Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+    Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul");
     B.CreateStore(B.getInt8(0), Ptr);
 
     return ConstantInt::get(CI->getType(), 1);
@@ -2276,7 +2276,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // __stpcpy_chk(x,x,...)  -> x+strlen(x)
   if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
     Value *StrLen = EmitStrLen(Src, B, DL, TLI);
-    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
   }
 
   // If a) we don't have any length information, or b) we know this will
@@ -2284,25 +2284,25 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
   // st[rp]cpy_chk call which may fail at runtime if the size is too long.
   // TODO: It might be nice to get a maximum length out of the possible
   // string lengths for varying.
-  if (isFortifiedCallFoldable(CI, 2, 1, true)) {
-    Value *Ret = EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
-    return Ret;
-  } else if (!OnlyLowerUnknownSize) {
-    // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
-    uint64_t Len = GetStringLength(Src);
-    if (Len == 0)
-      return nullptr;
+  if (isFortifiedCallFoldable(CI, 2, 1, true))
+    return EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
 
-    Type *SizeTTy = DL.getIntPtrType(CI->getContext());
-    Value *LenV = ConstantInt::get(SizeTTy, Len);
-    Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
-    // If the function was an __stpcpy_chk, and we were able to fold it into
-    // a __memcpy_chk, we still need to return the correct end pointer.
-    if (Ret && Func == LibFunc::stpcpy_chk)
-      return B.CreateGEP(Dst, ConstantInt::get(SizeTTy, Len - 1));
-    return Ret;
-  }
-  return nullptr;
+  if (OnlyLowerUnknownSize)
+    return nullptr;
+
+  // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
+  uint64_t Len = GetStringLength(Src);
+  if (Len == 0)
+    return nullptr;
+
+  Type *SizeTTy = DL.getIntPtrType(CI->getContext());
+  Value *LenV = ConstantInt::get(SizeTTy, Len);
+  Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
+  // If the function was an __stpcpy_chk, and we were able to fold it into
+  // a __memcpy_chk, we still need to return the correct end pointer.
+  if (Ret && Func == LibFunc::stpcpy_chk)
+    return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
+  return Ret;
 }
 
 Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
@@ -2322,8 +2322,18 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
 }
 
 Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
-  if (CI->isNoBuiltin())
-    return nullptr;
+  // FIXME: We shouldn't be changing "nobuiltin" or TLI unavailable calls here.
+  // Some clang users checked for _chk libcall availability using:
+  //   __has_builtin(__builtin___memcpy_chk)
+  // When compiling with -fno-builtin, this is always true.
+  // When passing -ffreestanding/-mkernel, which both imply -fno-builtin, we
+  // end up with fortified libcalls, which isn't acceptable in a freestanding
+  // environment which only provides their non-fortified counterparts.
+  //
+  // Until we change clang and/or teach external users to check for availability
+  // differently, disregard the "nobuiltin" attribute and TLI::has.
+  //
+  // PR23093.
 
   LibFunc::Func Func;
   Function *Callee = CI->getCalledFunction();
@@ -2332,7 +2342,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
   bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
 
   // First, check that this is a known library functions.
-  if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func))
+  if (!TLI->getLibFunc(FuncName, Func))
     return nullptr;
 
   // We never change the calling convention.
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index b7d0ae4..8986932 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -93,6 +93,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/VectorUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <map>
 #include <tuple>
@@ -503,8 +504,7 @@ static std::string getDebugLocString(const Loop *L) {
   std::string Result;
   if (L) {
     raw_string_ostream OS(Result);
-    const DebugLoc LoopDbgLoc = L->getStartLoc();
-    if (!LoopDbgLoc.isUnknown())
+    if (const DebugLoc LoopDbgLoc = L->getStartLoc())
       LoopDbgLoc.print(OS);
     else
       // Just print the module name.
@@ -686,7 +686,7 @@ public:
           Index = B.CreateNeg(Index);
         else if (!StepValue->isOne())
           Index = B.CreateMul(Index, StepValue);
-        return B.CreateGEP(StartValue, Index);
+        return B.CreateGEP(nullptr, StartValue, Index);
 
       case IK_NoInduction:
         return nullptr;
@@ -1839,7 +1839,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
     
     for (unsigned Part = 0; Part < UF; ++Part) {
       // Calculate the pointer for the specific unroll-part.
-      Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+      Value *PartPtr =
+          Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
       if (Reverse) {
         // If we store to reverse consecutive memory locations then we need
@@ -1847,8 +1848,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
         StoredVal[Part] = reverseVector(StoredVal[Part]);
         // If the address is consecutive but reversed, then the
         // wide store needs to start at the last vector element.
-        PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
-        PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+        PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+        PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
         Mask[Part] = reverseVector(Mask[Part]);
       }
 
@@ -1871,13 +1872,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   setDebugLocFromInst(Builder, LI);
   for (unsigned Part = 0; Part < UF; ++Part) {
     // Calculate the pointer for the specific unroll-part.
-    Value *PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(Part * VF));
+    Value *PartPtr =
+        Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
     if (Reverse) {
       // If the address is consecutive but reversed, then the
       // wide load needs to start at the last vector element.
-      PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
-      PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
+      PartPtr = Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(-Part * VF));
+      PartPtr = Builder.CreateGEP(nullptr, PartPtr, Builder.getInt32(1 - VF));
       Mask[Part] = reverseVector(Mask[Part]);
     }
 
@@ -4007,6 +4009,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   if (!LAI->canVectorizeMemory())
     return false;
 
+  if (LAI->hasStoreToLoopInvariantAddress()) {
+    emitAnalysis(
+        VectorizationReport()
+        << "write to a loop invariant address could not be vectorized");
+    DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
+    return false;
+  }
+
   if (LAI->getNumRuntimePointerChecks() >
       VectorizerParams::RuntimeMemoryCheckThreshold) {
     emitAnalysis(VectorizationReport()
@@ -4307,32 +4317,31 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
   }
 }
 
-LoopVectorizationLegality::InductionKind
-LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
-                                               ConstantInt *&StepValue) {
+bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+                          ConstantInt *&StepValue) {
   Type *PhiTy = Phi->getType();
   // We only handle integer and pointer inductions variables.
   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
-    return IK_NoInduction;
+    return false;
 
   // Check that the PHI is consecutive.
   const SCEV *PhiScev = SE->getSCEV(Phi);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
   if (!AR) {
     DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
-    return IK_NoInduction;
+    return false;
   }
 
   const SCEV *Step = AR->getStepRecurrence(*SE);
   // Calculate the pointer stride and check if it is consecutive.
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
   if (!C)
-    return IK_NoInduction;
+    return false;
 
   ConstantInt *CV = C->getValue();
   if (PhiTy->isIntegerTy()) {
     StepValue = CV;
-    return IK_IntInduction;
+    return true;
   }
 
   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
@@ -4340,14 +4349,28 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
   // The pointer stride cannot be determined if the pointer element type is not
   // sized.
   if (!PointerElementType->isSized())
-    return IK_NoInduction;
+    return false;
 
   const DataLayout &DL = Phi->getModule()->getDataLayout();
   int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
   int64_t CVSize = CV->getSExtValue();
   if (CVSize % Size)
-    return IK_NoInduction;
+    return false;
   StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+  return true;
+}
+
+LoopVectorizationLegality::InductionKind
+LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
+                                               ConstantInt *&StepValue) {
+  if (!isInductionPHI(Phi, SE, StepValue))
+    return IK_NoInduction;
+
+  Type *PhiTy = Phi->getType();
+  // Found an Integer induction variable.
+  if (PhiTy->isIntegerTy())
+    return IK_IntInduction;
+  // Found an Pointer induction variable.
   return IK_PtrInduction;
 }
 
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8fc4cc1..7267f58 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1183,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
     case Instruction::ICmp:
     case Instruction::FCmp: {
       // Check that all of the compares have the same predicate.
-      CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
       Type *ComparedTy = cast<Instruction>(VL[0])->getOperand(0)->getType();
       for (unsigned i = 1, e = VL.size(); i < e; ++i) {
         CmpInst *Cmp = cast<CmpInst>(VL[i]);
@@ -2202,7 +2202,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       if (Value *V = alreadyVectorized(E->Scalars))
         return V;
 
-      CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate();
+      CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
       Value *V;
       if (Opcode == Instruction::FCmp)
         V = Builder.CreateFCmp(P0, L, R);
@@ -3101,9 +3101,7 @@ struct SLPVectorizer : public FunctionPass {
     // delete instructions.
 
     // Scan the blocks in the function in post order.
-    for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
-         e = po_end(&F.getEntryBlock()); it != e; ++it) {
-      BasicBlock *BB = *it;
+    for (auto BB : post_order(&F.getEntryBlock())) {
       // Vectorize trees that end at stores.
       if (unsigned count = collectStores(BB, R)) {
         (void)count;
author	Pirama Arumuga Nainar <pirama@google.com>	2015-05-06 11:46:36 -0700
committer	Pirama Arumuga Nainar <pirama@google.com>	2015-05-18 10:52:30 -0700
commit	2c3e0051c31c3f5b2328b447eadf1cf9c4427442 (patch)
tree	c0104029af14e9f47c2ef58ca60e6137691f3c9b /lib
parent	e1bc145815f4334641be19f1c45ecf85d25b6e5a (diff)
download	external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.zip external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.gz external_llvm-2c3e0051c31c3f5b2328b447eadf1cf9c4427442.tar.bz2