14 files changed, 666 insertions, 38 deletions
diff --git a/courgette/adjustment_method_unittest.cc b/courgette/adjustment_method_unittest.cc
index 5e92c3e..5b213dc 100644
--- a/courgette/adjustment_method_unittest.cc
+++ b/courgette/adjustment_method_unittest.cc
@@ -26,7 +26,8 @@ class AdjustmentMethodTest : public testing::Test {
   // Returns one of two similar a simple programs.  They differ only in the
   // label assignment, so that it is possible to make them look identical.
   courgette::AssemblyProgram* MakeProgram(int kind) const {
-    courgette::AssemblyProgram* prog = new courgette::AssemblyProgram();
+    courgette::AssemblyProgram* prog =
+      new courgette::AssemblyProgram(courgette::EXE_WIN_32_X86);
     prog->set_image_base(0x00400000);
 
     courgette::Label* labelA = prog->FindOrMakeAbs32Label(0x00410000);
diff --git a/courgette/assembly_program.cc b/courgette/assembly_program.cc
index 6f137210..64830c2 100644
--- a/courgette/assembly_program.cc
+++ b/courgette/assembly_program.cc
@@ -94,14 +94,33 @@ class InstructionWithLabel : public Instruction {
     if (label == NULL) NOTREACHED();
   }
   Label* label() const { return label_; }
- private:
+ protected:
   Label* label_;
 };
 
+// An ARM REL32 instruction emits a reference to a label's address and
+// a specially-compressed ARM op.
+class InstructionWithLabelARM : public InstructionWithLabel {
+ public:
+  InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label,
+                          const uint8* arm_op, uint16 op_size)
+    : InstructionWithLabel(op, label), compressed_op_(compressed_op),
+      arm_op_(arm_op), op_size_(op_size) {
+    if (label == NULL) NOTREACHED();
+  }
+  uint16 compressed_op() const { return compressed_op_; }
+  const uint8* arm_op() const { return arm_op_; }
+  uint16 op_size() const { return op_size_; }
+ private:
+  uint16 compressed_op_;
+  const uint8* arm_op_;
+  uint16 op_size_;
+};
+
 }  // namespace
 
-AssemblyProgram::AssemblyProgram()
-  : image_base_(0) {
+AssemblyProgram::AssemblyProgram(ExecutableType kind)
+  : kind_(kind), image_base_(0) {
 }
 
 static void DeleteContainedLabels(const RVAToLabel& labels) {
@@ -147,6 +166,12 @@ CheckBool AssemblyProgram::EmitRel32(Label* label) {
   return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
 }
 
+CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label,
+                                        const uint8* arm_op, uint16 op_size) {
+  return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label,
+                                                        arm_op, op_size));
+}
+
 CheckBool AssemblyProgram::EmitAbs32(Label* label) {
   return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
 }
@@ -183,8 +208,11 @@ Label* AssemblyProgram::InstructionAbs32Label(
 
 Label* AssemblyProgram::InstructionRel32Label(
     const Instruction* instruction) const {
-  if (instruction->op() == REL32)
-    return static_cast<const InstructionWithLabel*>(instruction)->label();
+  if (instruction->op() == REL32 || instruction->op() == REL32ARM) {
+    Label* label =
+        static_cast<const InstructionWithLabel*>(instruction)->label();
+    return label;
+  }
   return NULL;
 }
 
@@ -202,6 +230,7 @@ Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
   if (slot == NULL) {
     slot = new(std::nothrow) Label(rva);
   }
+  slot->count_++;
   return slot;
 }
 
@@ -374,6 +403,16 @@ EncodedProgram* AssemblyProgram::Encode() const {
           return NULL;
         break;
       }
+      case REL32ARM: {
+        Label* label =
+            static_cast<InstructionWithLabelARM*>(instruction)->label();
+        uint16 compressed_op =
+          static_cast<InstructionWithLabelARM*>(instruction)->
+          compressed_op();
+        if (!encoded->AddRel32ARM(compressed_op, label->index_))
+          return NULL;
+        break;
+      }
       case ABS32: {
         Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
         if (!encoded->AddAbs32(label->index_))
@@ -425,6 +464,19 @@ Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
   return byte_instruction_cache_[byte];
 }
 
+void AssemblyProgram::PrintLabelCounts(RVAToLabel* labels) {
+  for (RVAToLabel::const_iterator p = labels->begin(); p != labels->end();
+       ++p) {
+    Label* current = p->second;
+    if (current->index_ != Label::kNoIndex)
+      printf("%d\n", current->count_);
+  }
+}
+
+void AssemblyProgram::CountRel32ARM() {
+  PrintLabelCounts(&rel32_labels_);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 
 Status Encode(AssemblyProgram* program, EncodedProgram** output) {
diff --git a/courgette/assembly_program.h b/courgette/assembly_program.h
index bb2d34c..af4ecfb 100644
--- a/courgette/assembly_program.h
+++ b/courgette/assembly_program.h
@@ -31,11 +31,12 @@ typedef NoThrowBuffer<Instruction*> InstructionVector;
 class Label {
  public:
   static const int kNoIndex = -1;
-  Label() : rva_(0), index_(kNoIndex) {}
-  explicit Label(RVA rva) : rva_(rva), index_(kNoIndex) {}
+  Label() : rva_(0), index_(kNoIndex), count_(0) {}
+  explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}
 
   RVA rva_;    // Address referred to by the label.
   int index_;  // Index of address in address table, kNoIndex until assigned.
+  int count_;
 };
 
 typedef std::map<RVA, Label*> RVAToLabel;
@@ -61,9 +62,11 @@ typedef std::map<RVA, Label*> RVAToLabel;
 //
 class AssemblyProgram {
  public:
-  AssemblyProgram();
+  explicit AssemblyProgram(ExecutableType kind);
   ~AssemblyProgram();
 
+  ExecutableType kind() const { return kind_; }
+
   void set_image_base(uint64 image_base) { image_base_ = image_base; }
 
   // Instructions will be assembled in the order they are emitted.
@@ -86,6 +89,11 @@ class AssemblyProgram {
   // Generates 4-byte relative reference to address of 'label'.
   CheckBool EmitRel32(Label* label) WARN_UNUSED_RESULT;
 
+  // Generates 4-byte relative reference to address of 'label' for
+  // ARM.
+  CheckBool EmitRel32ARM(uint16 op, Label* label, const uint8* arm_op,
+                         uint16 op_size) WARN_UNUSED_RESULT;
+
   // Generates 4-byte absolute reference to address of 'label'.
   CheckBool EmitAbs32(Label* label) WARN_UNUSED_RESULT;
 
@@ -114,7 +122,12 @@ class AssemblyProgram {
   // otherwise returns NULL.
   Label* InstructionRel32Label(const Instruction* instruction) const;
 
+  void PrintLabelCounts(RVAToLabel* labels);
+  void CountRel32ARM();
+
  private:
+  ExecutableType kind_;
+
   CheckBool Emit(Instruction* instruction) WARN_UNUSED_RESULT;
 
   // Looks up a label or creates a new one.  Might return NULL.
diff --git a/courgette/disassembler.cc b/courgette/disassembler.cc
index 48227b3..798a367 100644
--- a/courgette/disassembler.cc
+++ b/courgette/disassembler.cc
@@ -79,7 +79,7 @@ Status ParseDetectedExecutable(const void* buffer, size_t length,
     return C_INPUT_NOT_RECOGNIZED;
   }
 
-  AssemblyProgram* program = new AssemblyProgram();
+  AssemblyProgram* program = new AssemblyProgram(disassembler->kind());
 
   if (!disassembler->Disassemble(program)) {
     delete program;
diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc
index 1033fd2..fc4c379 100644
--- a/courgette/disassembler_elf_32.cc
+++ b/courgette/disassembler_elf_32.cc
@@ -389,9 +389,9 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(
       RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
                              relative_target);
 
-      if (!program->EmitRel32(program->FindOrMakeRel32Label(target_rva)))
+      if (! (**current_rel)->EmitInstruction(program, target_rva))
         return false;
-      file_offset += sizeof(RVA);
+      file_offset += (**current_rel)->op_size();
       (*current_rel)++;
       continue;
     }
diff --git a/courgette/disassembler_elf_32.h b/courgette/disassembler_elf_32.h
index b3f6e59..dc44ec5 100644
--- a/courgette/disassembler_elf_32.h
+++ b/courgette/disassembler_elf_32.h
@@ -7,6 +7,7 @@
 
 #include "base/basictypes.h"
 #include "base/memory/scoped_vector.h"
+#include "courgette/assembly_program.h"
 #include "courgette/disassembler.h"
 #include "courgette/memory_allocator.h"
 #include "courgette/types_elf.h"
@@ -54,8 +55,15 @@ class DisassemblerElf32 : public Disassembler {
       offset_ = offset;
     }
 
+    // Computes the relative jump's offset from the op in p.
     virtual CheckBool ComputeRelativeTarget(const uint8* op_pointer) = 0;
 
+    // Emits the courgette instruction corresponding to the RVA type.
+    virtual CheckBool EmitInstruction(AssemblyProgram* program,
+                                      RVA target_rva) = 0;
+
+    virtual uint16 op_size() const = 0;
+
     static bool IsLessThan(TypedRVA *a, TypedRVA *b) {
       return a->rva() < b->rva();
     }
diff --git a/courgette/disassembler_elf_32_arm.cc b/courgette/disassembler_elf_32_arm.cc
index 6270c64..d367716 100644
--- a/courgette/disassembler_elf_32_arm.cc
+++ b/courgette/disassembler_elf_32_arm.cc
@@ -17,45 +17,253 @@
 
 namespace courgette {
 
-CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget(
-    const uint8* op_pointer) {
-  uint32 temp = 0;
-
-  switch (type_) {
-    case ARM_OFF24:
-      // The offset is given by the lower 24-bits of the op, shifted
-      // left 2 bits, and sign extended.
-      temp = Read32LittleEndian(op_pointer);
-      temp = (temp & 0x00FFFFFF) << 2;
-      if (temp & 0x02000000)
-        temp |= 0xFC000000;
-      temp += 8;
-      break;
-    case ARM_OFF8:
+CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, uint32 arm_op, RVA rva,
+                                         uint16* c_op, uint32* addr) {
+  // This method takes an ARM or thumb opcode, extracts the relative
+  // target address from it (addr), and creates a corresponding
+  // Courgette opcode (c_op).
+  //
+  // Details on ARM the opcodes, and how the relative targets are
+  // computed were taken from the "ARM Architecture Reference Manual",
+  // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12.
+  // ARM_OFF24 is for the ARM opcode.  The rest are for thumb opcodes.
+  switch (type) {
+    case ARM_OFF8: {
       // The offset is given by lower 8 bits of the op.  It is a 9-bit
       // offset, shifted right one bit and signed extended.
-      temp = (Read16LittleEndian(op_pointer) & 0x00FF) << 1;
+      uint32 temp = (arm_op & 0x00FF) << 1;
       if (temp & 0x0100)
         temp |= 0xFFFFFE00;
       temp += 4;  // Offset from _next_ PC.
+      fflush(stdout);
+
+      (*addr) = temp;
+      (*c_op) = (arm_op >> 8) | 0x1000;
       break;
-    case ARM_OFF11:
+    }
+    case ARM_OFF11: {
       // The offset is given by lower 11 bits of the op, and is a
       // 12-bit offset, shifted right one bit and sign extended.
-      temp = (Read16LittleEndian(op_pointer) & 0x07FF) << 1;
+      uint32 temp = (arm_op & 0x07FF) << 1;
       if (temp & 0x00000800)
         temp |= 0xFFFFF000;
       temp += 4;  // Offset from _next_ PC.
+
+      (*addr) = temp;
+      (*c_op) = (arm_op >> 11) | 0x2000;
       break;
+    }
+    case ARM_OFF24: {
+      // The offset is given by the lower 24-bits of the op, shifted
+      // left 2 bits, and sign extended.
+      uint32 temp = (arm_op & 0x00FFFFFF) << 2;
+      if (temp & 0x02000000)
+        temp |= 0xFC000000;
+      temp += 8;
+
+      (*addr) = temp;
+      (*c_op) = (arm_op >> 24) | 0x3000;
+      break;
+    }
+    case ARM_OFF25: {
+      uint32 temp = 0;
+      temp |= (arm_op & 0x000007FF) << 1;  // imm11
+      temp |= (arm_op & 0x03FF0000) >> 4;  // imm10
+
+      uint32 S   = (arm_op & (1 << 26)) >> 26;
+      uint32 j2  = (arm_op & (1 << 11)) >> 11;
+      uint32 j1  = (arm_op & (1 << 13)) >> 13;
+      bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0;
+      bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0;
+
+      uint32 i2  = ~(j2 ^ S) & 1;
+      uint32 i1  = ~(j1 ^ S) & 1;
+      bool toARM =  bit14 && !bit12;
+
+      temp |= (S << 24) | (i1 << 23) | (i2 << 22);
+
+      if (temp & 0x01000000) // sign extension
+        temp |= 0xFE000000;
+      uint32 prefetch;
+      if (toARM) {
+        // Align PC on 4-byte boundary
+        uint32 align4byte = (rva % 4) ? 2 : 4;
+        prefetch = align4byte;
+      } else {
+        prefetch = 4;
+      }
+      temp += prefetch;
+      (*addr) = temp;
+
+      uint32 temp2 = 0x4000;
+      temp2 |= (arm_op & (1 << 12)) >> 12;
+      temp2 |= (arm_op & (1 << 14)) >> 13;
+      temp2 |= (arm_op & (1 << 15)) >> 13;
+      temp2 |= (arm_op & 0xF8000000) >> 24;
+      temp2 |= (prefetch & 0x0000000F) << 8;
+      (*c_op) = temp2;
+      break;
+    }
+    case ARM_OFF21: {
+      uint32 temp = 0;
+      temp |= (arm_op & 0x000007FF) << 1; // imm11
+      temp |= (arm_op & 0x003F0000) >> 4; // imm6
+
+      uint32 S   = (arm_op & (1 << 26)) >> 26;
+      uint32 j2  = (arm_op & (1 << 11)) >> 11;
+      uint32 j1  = (arm_op & (1 << 13)) >> 13;
+
+      temp |= (S << 20) | (j1 << 19) | (j2 << 18);
+
+      if (temp & 0x00100000) // sign extension
+        temp |= 0xFFE00000;
+      temp += 4;
+      (*addr) = temp;
+
+      uint32 temp2 = 0x5000;
+      temp2 |= (arm_op & 0x03C00000) >> 22;  // just save the cond
+      (*c_op) = temp2;
+      break;
+    }
     default:
       return false;
   }
+  return true;
+}
+
+CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type, uint16 c_op,
+                                           uint32 addr, uint32* arm_op) {
+  // Reverses the process in the compress() method.  Takes the
+  // Courgette op and relative address and reconstructs the original
+  // ARM or thumb op.
+  switch (type) {
+    case ARM_OFF8:
+      (*arm_op) = ((c_op & 0x0FFF) << 8) | (((addr - 4) >> 1) & 0x000000FF);
+      break;
+    case ARM_OFF11:
+      (*arm_op) = ((c_op & 0x0FFF) << 11) | (((addr - 4) >> 1) & 0x000007FF);
+      break;
+    case ARM_OFF24:
+      (*arm_op) = ((c_op & 0x0FFF) << 24) | (((addr - 8) >> 2) & 0x00FFFFFF);
+      break;
+    case ARM_OFF25: {
+      uint32 temp = 0;
+      temp |= (c_op & (1 << 0)) << 12;
+      temp |= (c_op & (1 << 1)) << 13;
+      temp |= (c_op & (1 << 2)) << 13;
+      temp |= (c_op & (0xF8000000 >> 24)) << 24;
+
+      uint32 prefetch = (c_op & 0x0F00) >> 8;
+      addr -= prefetch;
+
+      addr &= 0x01FFFFFF;
 
-  set_relative_target(temp);
+      uint32 S  = (addr & (1 << 24)) >> 24;
+      uint32 i1 = (addr & (1 << 23)) >> 23;
+      uint32 i2 = (addr & (1 << 22)) >> 22;
 
+      uint32 j1 = ((~i1) ^ S) & 1;
+      uint32 j2 = ((~i2) ^ S) & 1;
+
+      temp |= S << 26;
+      temp |= j2 << 11;
+      temp |= j1 << 13;
+
+      temp |= (addr & (0x000007FF << 1)) >> 1;
+      temp |= (addr & (0x03FF0000 >> 4)) << 4;
+
+      (*arm_op) = temp;
+      break;
+    }
+    case ARM_OFF21: {
+      uint32 temp = 0xF0008000;
+      temp |= (c_op & (0x03C00000 >> 22)) << 22;
+
+      addr -= 4;
+      addr &= 0x001FFFFF;
+
+      uint32 S  = (addr & (1 << 20)) >> 20;
+      uint32 j1 = (addr & (1 << 19)) >> 19;
+      uint32 j2 = (addr & (1 << 18)) >> 18;
+
+      temp |= S << 26;
+      temp |= j2 << 11;
+      temp |= j1 << 13;
+
+      temp |= (addr & (0x000007FF << 1)) >> 1;
+      temp |= (addr & (0x003F0000 >> 4)) << 4;
+
+      (*arm_op) = temp;
+      break;
+    }
+    default:
+      return false;
+  }
   return true;
 }
 
+uint16 DisassemblerElf32ARM::TypedRVAARM::op_size() const {
+  switch (type_) {
+    case ARM_OFF8:
+      return 2;
+    case ARM_OFF11:
+      return 2;
+    case ARM_OFF24:
+      return 4;
+    case ARM_OFF25:
+      return 4;
+    case ARM_OFF21:
+      return 4;
+    default:
+      return -1;
+  }
+}
+
+CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget(
+    const uint8* op_pointer) {
+  arm_op_ = op_pointer;
+  switch (type_) {
+    case ARM_OFF8:
+      // Fall through
+    case ARM_OFF11: {
+      RVA relative_target;
+      CheckBool ret = Compress(type_, Read16LittleEndian(op_pointer), rva(),
+                               &c_op_, &relative_target);
+      set_relative_target(relative_target);
+      return ret;
+    }
+    case ARM_OFF24: {
+      RVA relative_target;
+      CheckBool ret = Compress(type_, Read32LittleEndian(op_pointer), rva(),
+                               &c_op_, &relative_target);
+      set_relative_target(relative_target);
+      return ret;
+    }
+    case ARM_OFF25:
+      // Fall through
+    case ARM_OFF21: {
+      // A thumb-2 op is 32 bits stored as two 16-bit words
+      uint32 pval = (Read16LittleEndian(op_pointer) << 16)
+        | Read16LittleEndian(op_pointer + 2);
+      RVA relative_target;
+      CheckBool ret = Compress(type_, pval, rva(), &c_op_, &relative_target);
+      set_relative_target(relative_target);
+      return ret;
+    }
+   default:
+     return false;
+  }
+}
+
+CheckBool DisassemblerElf32ARM::TypedRVAARM::EmitInstruction(
+    AssemblyProgram* program,
+    RVA target_rva) {
+  return program->EmitRel32ARM(c_op(),
+                               program->FindOrMakeRel32Label(target_rva),
+                               arm_op_,
+                               op_size());
+}
+
 DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length)
   : DisassemblerElf32(start, length) {
 }
@@ -158,7 +366,124 @@ CheckBool DisassemblerElf32ARM::ParseRelocationSection(
 
 CheckBool DisassemblerElf32ARM::ParseRel32RelocsFromSection(
     const Elf32_Shdr* section_header) {
-  // TODO(paulgazz) find relative jumps in ARM assembly
+
+  uint32 start_file_offset = section_header->sh_offset;
+  uint32 end_file_offset = start_file_offset + section_header->sh_size;
+
+  const uint8* start_pointer = OffsetToPointer(start_file_offset);
+  const uint8* end_pointer = OffsetToPointer(end_file_offset);
+
+  // Quick way to convert from Pointer to RVA within a single Section is to
+  // subtract 'pointer_to_rva'.
+  const uint8* const adjust_pointer_to_rva = start_pointer -
+                                             section_header->sh_addr;
+
+  // Find the rel32 relocations.
+  const uint8* p = start_pointer;
+  bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
+  while (p < end_pointer) {
+    // Heuristic discovery of rel32 locations in instruction stream: are the
+    // next few bytes the start of an instruction containing a rel32
+    // addressing mode?
+
+    TypedRVAARM* rel32_rva = NULL;
+    RVA target_rva;
+    bool found = false;
+
+    // 16-bit thumb ops
+    if (!found && (p + 3) <= end_pointer) {
+      uint16 pval = Read16LittleEndian(p);
+      if ((pval & 0xF000) == 0xD000) {
+        RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
+
+        rel32_rva = new TypedRVAARM(ARM_OFF8, rva);
+        if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
+          return false;
+        }
+        target_rva = rel32_rva->rva() + rel32_rva->relative_target();
+        found = true;
+      } else if ((pval & 0xF800) == 0xE000) {
+        RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
+
+        rel32_rva = new TypedRVAARM(ARM_OFF11, rva);
+        if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
+          return false;
+        }
+        target_rva = rel32_rva->rva() + rel32_rva->relative_target();
+        found = true;
+      }
+    }
+
+    // thumb-2 ops comprised of two 16-bit words
+    if (!found && (p + 5) <= end_pointer) {
+      // This is really two 16-bit words, not one 32-bit word.
+      uint32 pval = (Read16LittleEndian(p) << 16) | Read16LittleEndian(p + 2);
+      if ((pval & 0xF8008000) == 0xF0008000) {
+        // Covers thumb-2's 32-bit conditional/unconditional branches
+
+        if ( (pval & (1 << 14)) || (pval & (1 << 12)) ) {
+          // A branch, with link, or with link and exchange.
+          RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
+
+          rel32_rva = new TypedRVAARM(ARM_OFF25, rva);
+          if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
+            return false;
+          }
+          target_rva = rel32_rva->rva() + rel32_rva->relative_target();
+          found = true;
+        } else {
+          // TODO(paulgazz) make sure cond is not 111
+          // A conditional branch instruction
+          RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
+
+          rel32_rva = new TypedRVAARM(ARM_OFF21, rva);
+          if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
+            return false;
+          }
+          target_rva = rel32_rva->rva() + rel32_rva->relative_target();
+          found = true;
+        }
+      }
+    }
+
+    // 32-bit ARM ops
+    if (!found && on_32bit && (p + 5) <= end_pointer) {
+      uint32 pval = Read32LittleEndian(p);
+      if ((pval & 0x0E000000) == 0x0A000000) {
+        // Covers both 0x0A 0x0B ARM relative branches
+        RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
+
+        rel32_rva = new TypedRVAARM(ARM_OFF24, rva);
+        if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
+          return false;
+        }
+        target_rva = rel32_rva->rva() + rel32_rva->relative_target();
+        found = true;
+      }
+    }
+
+    if (found && IsValidRVA(target_rva)) {
+      rel32_locations_.push_back(rel32_rva);
+#if COURGETTE_HISTOGRAM_TARGETS
+      ++rel32_target_rvas_[target_rva];
+#endif
+      p += rel32_rva->op_size();
+
+      // A tricky way to update the on_32bit flag.  Here is the truth table:
+      // on_32bit | on_32bit   size is 4
+      // ---------+---------------------
+      // 1        | 0          0
+      // 0        | 0          1
+      // 0        | 1          0
+      // 1        | 1          1
+      on_32bit = (~(on_32bit ^ (rel32_rva->op_size() == 4))) != 0;
+    } else {
+      // Move 2 bytes at a time, but track 32-bit boundaries
+      p += 2;
+      on_32bit = ((on_32bit + 1) % 2) != 0;
+    }
+  }
+
   return true;
 }
 
diff --git a/courgette/disassembler_elf_32_arm.h b/courgette/disassembler_elf_32_arm.h
index 57e750e..08b8db8 100644
--- a/courgette/disassembler_elf_32_arm.h
+++ b/courgette/disassembler_elf_32_arm.h
@@ -18,6 +18,8 @@ enum ARM_RVA {
   ARM_OFF8,
   ARM_OFF11,
   ARM_OFF24,
+  ARM_OFF25,
+  ARM_OFF21,
 };
 
 class DisassemblerElf32ARM : public DisassemblerElf32 {
@@ -26,10 +28,22 @@ class DisassemblerElf32ARM : public DisassemblerElf32 {
    public:
     TypedRVAARM(ARM_RVA type, RVA rva) : TypedRVA(rva), type_(type) { }
 
-    virtual CheckBool ComputeRelativeTarget(const uint8* op_pointer) OVERRIDE;
+    uint16 c_op() const {
+      return c_op_;
+    }
+
+    virtual CheckBool ComputeRelativeTarget(const uint8* op_pointer);
+
+    virtual CheckBool EmitInstruction(AssemblyProgram* program,
+                                      RVA target_rva);
+
+    virtual uint16 op_size() const;
 
    private:
     ARM_RVA type_;
+
+    uint16 c_op_;  // set by ComputeRelativeTarget()
+    const uint8* arm_op_;
   };
 
   explicit DisassemblerElf32ARM(const void* start, size_t length);
@@ -38,6 +52,12 @@ class DisassemblerElf32ARM : public DisassemblerElf32 {
 
   virtual e_machine_values ElfEM() { return EM_ARM; }
 
+  static CheckBool Compress(ARM_RVA type, uint32 arm_op, RVA rva,
+                            uint16* c_op /* out */, uint32* addr /* out */);
+
+  static CheckBool Decompress(ARM_RVA type, uint16 c_op, uint32 addr,
+                              uint32* arm_op /* out */);
+
  protected:
 
   virtual CheckBool RelToRVA(Elf32_Rel rel, RVA* result)
diff --git a/courgette/disassembler_elf_32_x86.h b/courgette/disassembler_elf_32_x86.h
index 5e7cdff..72d7e31 100644
--- a/courgette/disassembler_elf_32_x86.h
+++ b/courgette/disassembler_elf_32_x86.h
@@ -25,6 +25,13 @@ class DisassemblerElf32X86 : public DisassemblerElf32 {
       set_relative_target(Read32LittleEndian(op_pointer) + 4);
       return true;
     }
+
+    virtual CheckBool EmitInstruction(AssemblyProgram* program,
+                                       RVA target_rva) OVERRIDE {
+      return program->EmitRel32(program->FindOrMakeRel32Label(target_rva));
+    }
+
+    virtual uint16 op_size() const OVERRIDE { return 4; }
   };
 
   explicit DisassemblerElf32X86(const void* start, size_t length);
diff --git a/courgette/disassembler_elf_32_x86_unittest.cc b/courgette/disassembler_elf_32_x86_unittest.cc
index 297ffcc..d48bc4f 100644
--- a/courgette/disassembler_elf_32_x86_unittest.cc
+++ b/courgette/disassembler_elf_32_x86_unittest.cc
@@ -38,7 +38,8 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name,
   EXPECT_EQ('L', offset_p[2]);
   EXPECT_EQ('F', offset_p[3]);
 
-  courgette::AssemblyProgram* program = new courgette::AssemblyProgram();
+  courgette::AssemblyProgram* program =
+    new courgette::AssemblyProgram(courgette::EXE_ELF_32_X86);
 
   EXPECT_TRUE(disassembler->Disassemble(program));
 
diff --git a/courgette/encoded_program.cc b/courgette/encoded_program.cc
index b7c9b55..c619c6a 100644
--- a/courgette/encoded_program.cc
+++ b/courgette/encoded_program.cc
@@ -243,6 +243,11 @@ CheckBool EncodedProgram::AddRel32(int label_index) {
   return ops_.push_back(REL32) && rel32_ix_.push_back(label_index);
 }
 
+CheckBool EncodedProgram::AddRel32ARM(uint16 op, int label_index) {
+  return ops_.push_back(static_cast<OP>(op)) &&
+      rel32_ix_.push_back(label_index);
+}
+
 CheckBool EncodedProgram::AddPeMakeRelocs() {
   return ops_.push_back(MAKE_PE_RELOCATION_TABLE);
 }
@@ -398,6 +403,119 @@ bool VectorAt(const V& v, size_t index, T* output) {
   return true;
 }
 
+CheckBool EncodedProgram::EvaluateRel32ARM(OP op,
+                                           size_t& ix_rel32_ix,
+                                           RVA& current_rva,
+                                           SinkStream* output) {
+  switch (op & 0x0000F000) {
+    case REL32ARM8: {
+      uint32 index;
+      if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
+        return false;
+      ++ix_rel32_ix;
+      RVA rva;
+      if (!VectorAt(rel32_rva_, index, &rva))
+        return false;
+      uint32 decompressed_op;
+      if (!DisassemblerElf32ARM::Decompress(ARM_OFF8,
+                                            static_cast<uint16>(op),
+                                            static_cast<uint32>(rva -
+                                                                current_rva),
+                                            &decompressed_op)) {
+        return false;
+      }
+      uint16 op16 = decompressed_op;
+      if (!output->Write(&op16, 2))
+        return false;
+      current_rva += 2;
+      break;
+    }
+    case REL32ARM11: {
+      uint32 index;
+      if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
+        return false;
+      ++ix_rel32_ix;
+      RVA rva;
+      if (!VectorAt(rel32_rva_, index, &rva))
+        return false;
+      uint32 decompressed_op;
+      if (!DisassemblerElf32ARM::Decompress(ARM_OFF11, (uint16) op,
+                                            (uint32) (rva - current_rva),
+                                            &decompressed_op)) {
+        return false;
+      }
+      uint16 op16 = decompressed_op;
+      if (!output->Write(&op16, 2))
+        return false;
+      current_rva += 2;
+      break;
+    }
+    case REL32ARM24: {
+      uint32 index;
+      if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
+        return false;
+      ++ix_rel32_ix;
+      RVA rva;
+      if (!VectorAt(rel32_rva_, index, &rva))
+        return false;
+      uint32 decompressed_op;
+      if (!DisassemblerElf32ARM::Decompress(ARM_OFF24, (uint16) op,
+                                            (uint32) (rva - current_rva),
+                                            &decompressed_op)) {
+        return false;
+      }
+      if (!output->Write(&decompressed_op, 4))
+        return false;
+      current_rva += 4;
+      break;
+    }
+    case REL32ARM25: {
+      uint32 index;
+      if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
+        return false;
+      ++ix_rel32_ix;
+      RVA rva;
+      if (!VectorAt(rel32_rva_, index, &rva))
+        return false;
+      uint32 decompressed_op;
+      if (!DisassemblerElf32ARM::Decompress(ARM_OFF25, (uint16) op,
+                                            (uint32) (rva - current_rva),
+                                            &decompressed_op)) {
+        return false;
+      }
+      uint32 words = (decompressed_op << 16) | (decompressed_op >> 16);
+      if (!output->Write(&words, 4))
+        return false;
+      current_rva += 4;
+      break;
+    }
+    case REL32ARM21: {
+      uint32 index;
+      if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
+        return false;
+      ++ix_rel32_ix;
+      RVA rva;
+      if (!VectorAt(rel32_rva_, index, &rva))
+        return false;
+      uint32 decompressed_op;
+      if (!DisassemblerElf32ARM::Decompress(ARM_OFF21, (uint16) op,
+                                            (uint32) (rva - current_rva),
+                                            &decompressed_op)) {
+        return false;
+      }
+      uint32 words = (decompressed_op << 16) | (decompressed_op >> 16);
+      if (!output->Write(&words, 4))
+        return false;
+      current_rva += 4;
+      break;
+    }
+    default:
+      return false;
+  }
+
+  return true;
+}
+
 CheckBool EncodedProgram::AssembleTo(SinkStream* final_buffer) {
   // For the most part, the assembly process walks the various tables.
   // ix_mumble is the index into the mumble table.
@@ -420,7 +538,9 @@ CheckBool EncodedProgram::AssembleTo(SinkStream* final_buffer) {
 
     switch (op) {
       default:
-        return false;
+        if (!EvaluateRel32ARM(op, ix_rel32_ix, current_rva, output))
+          return false;
+        break;
 
       case ORIGIN: {
         RVA section_rva;
diff --git a/courgette/encoded_program.h b/courgette/encoded_program.h
index 0de4f6b..3eca364 100644
--- a/courgette/encoded_program.h
+++ b/courgette/encoded_program.h
@@ -43,6 +43,7 @@ class EncodedProgram {
   CheckBool AddOrigin(RVA rva) WARN_UNUSED_RESULT;
   CheckBool AddCopy(uint32 count, const void* bytes) WARN_UNUSED_RESULT;
   CheckBool AddRel32(int label_index) WARN_UNUSED_RESULT;
+  CheckBool AddRel32ARM(uint16 op, int label_index) WARN_UNUSED_RESULT;
   CheckBool AddAbs32(int label_index) WARN_UNUSED_RESULT;
   CheckBool AddPeMakeRelocs() WARN_UNUSED_RESULT;
   CheckBool AddElfMakeRelocs() WARN_UNUSED_RESULT;
@@ -74,6 +75,14 @@ class EncodedProgram {
     MAKE_PE_RELOCATION_TABLE = 5,  // Emit PE base relocation table blocks.
     MAKE_ELF_RELOCATION_TABLE = 6, // Emit Elf relocation table for X86
     MAKE_ELF_ARM_RELOCATION_TABLE = 7, // Emit Elf relocation table for ARM
+    // ARM reserves 0x1000-LAST_ARM, bits 13-16 define the opcode
+    // subset, and 1-12 are the compressed ARM op.
+    REL32ARM8   = 0x1000,
+    REL32ARM11  = 0x2000,
+    REL32ARM24  = 0x3000,
+    REL32ARM25  = 0x4000,
+    REL32ARM21  = 0x5000,
+    LAST_ARM    = 0x5FFF,
   };
 
   typedef NoThrowBuffer<RVA> RvaVector;
@@ -88,6 +97,10 @@ class EncodedProgram {
   CheckBool DefineLabelCommon(RvaVector*, int, RVA) WARN_UNUSED_RESULT;
   void FinishLabelsCommon(RvaVector* addresses);
 
+  // Decodes and evaluates courgette ops for ARM rel32 addresses.
+  CheckBool EvaluateRel32ARM(OP op, size_t& ix_rel32_ix, RVA& current_rva,
+                             SinkStream* output);
+
   // Binary assembly language tables.
   uint64 image_base_;
   RvaVector rel32_rva_;
diff --git a/courgette/patch_generator_x86_32.h b/courgette/patch_generator_x86_32.h
index 5ac017b..f68185e 100644
--- a/courgette/patch_generator_x86_32.h
+++ b/courgette/patch_generator_x86_32.h
@@ -10,6 +10,7 @@
 #include "base/logging.h"
 #include "base/memory/scoped_ptr.h"
 
+#include "courgette/assembly_program.h"
 #include "courgette/ensemble.h"
 
 namespace courgette {
diff --git a/courgette/typedrva_unittest.cc b/courgette/typedrva_unittest.cc
index ea38566..1fa185f 100644
--- a/courgette/typedrva_unittest.cc
+++ b/courgette/typedrva_unittest.cc
@@ -15,6 +15,11 @@ class TypedRVATest : public BaseTest {
                              courgette::RVA rva,
                              uint32 op,
                              courgette::RVA expected) const;
+
+  void TestARMOPEncode(courgette::ARM_RVA arm_rva,
+                       courgette::RVA rva,
+                       uint32 op,
+                       courgette::RVA expected) const;
 };
 
 void TypedRVATest::TestRelativeTargetX86(courgette::RVA word,
@@ -38,7 +43,7 @@ void TypedRVATest::TestRelativeTargetARM(courgette::ARM_RVA arm_rva,
                                          uint32 op,
                                          courgette::RVA expected) const {
   courgette::DisassemblerElf32ARM::TypedRVAARM* typed_rva
-    = new courgette::DisassemblerElf32ARM::TypedRVAARM(arm_rva, 0);
+    = new courgette::DisassemblerElf32ARM::TypedRVAARM(arm_rva, rva);
   uint8* op_pointer = reinterpret_cast<uint8*>(&op);
 
   EXPECT_TRUE(typed_rva->ComputeRelativeTarget(op_pointer));
@@ -47,6 +52,22 @@ void TypedRVATest::TestRelativeTargetARM(courgette::ARM_RVA arm_rva,
   delete typed_rva;
 }
 
+void TypedRVATest::TestARMOPEncode(courgette::ARM_RVA arm_rva,
+                             courgette::RVA rva,
+                             uint32 op,
+                             courgette::RVA expected) const {
+  uint16 c_op;
+  uint32 addr;
+  EXPECT_TRUE(courgette::DisassemblerElf32ARM::Compress(arm_rva, op, rva,
+                                                        &c_op, &addr));
+  EXPECT_EQ(rva + addr, expected);
+
+  uint32 new_op;
+  EXPECT_TRUE(courgette::DisassemblerElf32ARM::Decompress(arm_rva, c_op, addr,
+                                                          &new_op));
+  EXPECT_EQ(new_op, op);
+}
+
 TEST_F(TypedRVATest, TestX86) {
   TestRelativeTargetX86(0x0, 0x4);
 }
@@ -91,6 +112,52 @@ TEST_F(TypedRVATest, TestARM_OFF24_FORWARDS) {
 }
 
 TEST_F(TypedRVATest, TestARM_OFF24_BACKWARDS) {
-  // TODO(paulgazz): find a real-world example of an ARM branch op
-  // that jumps backwards.
+  // TODO(paulgazz): find a real-world example of an non-thumb ARM
+  // branch op that jumps backwards.
+}
+
+TEST_F(TypedRVATest, TestARM_OFF25_FORWARDS) {
+  TestRelativeTargetARM(courgette::ARM_OFF25, 0x2bf4, 0xfe06f008, 0xb804);
+  TestRelativeTargetARM(courgette::ARM_OFF25, 0x2c58, 0xfeacf005, 0x89b4);
+}
+
+TEST_F(TypedRVATest, TestARM_OFF25_BACKWARDS) {
+  TestRelativeTargetARM(courgette::ARM_OFF25, 0x2bd2, 0xeb9ef7ff, 0x2310);
+  TestRelativeTargetARM(courgette::ARM_OFF25, 0x2bd8, 0xeb8ef7ff, 0x22f8);
+  TestRelativeTargetARM(courgette::ARM_OFF25, 0x2c3e, 0xea2ef7ff, 0x209c);
+}
+
+TEST_F(TypedRVATest, TestARM_OFF21_FORWARDS) {
+  TestRelativeTargetARM(courgette::ARM_OFF21, 0x2bc6, 0x84c7f000, 0x3558);
+  TestRelativeTargetARM(courgette::ARM_OFF21, 0x2bde, 0x871df000, 0x3a1c);
+  TestRelativeTargetARM(courgette::ARM_OFF21, 0x2c5e, 0x86c1f2c0, 0x39e4);
+}
+
+TEST_F(TypedRVATest, TestARM_OFF21_BACKWARDS) {
+  TestRelativeTargetARM(courgette::ARM_OFF21, 0x67e4, 0xaee9f43f, 0x65ba);
+  TestRelativeTargetARM(courgette::ARM_OFF21, 0x67ee, 0xaee4f47f, 0x65ba);
+}
+
+TEST_F(TypedRVATest, TestARMOPEncode) {
+  TestARMOPEncode(courgette::ARM_OFF8, 0x2bcc, 0xd00e, 0x2bec);
+  TestARMOPEncode(courgette::ARM_OFF8, 0x3752, 0xd910, 0x3776);
+  TestARMOPEncode(courgette::ARM_OFF8, 0x3774, 0xd1f6, 0x3764);
+  TestARMOPEncode(courgette::ARM_OFF11, 0x0, 0x0, 0x4);
+  TestARMOPEncode(courgette::ARM_OFF11, 0x2bea, 0xe005, 0x2bf8);
+  TestARMOPEncode(courgette::ARM_OFF11, 0x2f80, 0xe6cd, 0x2d1e);
+  TestARMOPEncode(courgette::ARM_OFF11, 0x3610, 0xe56a, 0x30e8);
+  TestARMOPEncode(courgette::ARM_OFF24, 0x0, 0x0, 0x8);
+  TestARMOPEncode(courgette::ARM_OFF24, 0x2384, 0x4af3613a, 0xffcda874);
+  TestARMOPEncode(courgette::ARM_OFF24, 0x23bc, 0x6af961b9, 0xffe5aaa8);
+  TestARMOPEncode(courgette::ARM_OFF24, 0x23d4, 0x2b006823, 0x1c468);
+  TestARMOPEncode(courgette::ARM_OFF25, 0x2bf4, 0xf008fe06, 0xb804);
+  TestARMOPEncode(courgette::ARM_OFF25, 0x2c58, 0xf005feac, 0x89b4);
+  TestARMOPEncode(courgette::ARM_OFF25, 0x2bd2, 0xf7ffeb9e, 0x2310);
+  TestARMOPEncode(courgette::ARM_OFF25, 0x2bd8, 0xf7ffeb8e, 0x22f8);
+  TestARMOPEncode(courgette::ARM_OFF25, 0x2c3e, 0xf7ffea2e, 0x209c);
+  TestARMOPEncode(courgette::ARM_OFF21, 0x2bc6, 0xf00084c7, 0x3558);
+  TestARMOPEncode(courgette::ARM_OFF21, 0x2bde, 0xf000871d, 0x3a1c);
+  TestARMOPEncode(courgette::ARM_OFF21, 0x2c5e, 0xf2c086c1, 0x39e4);
+  TestARMOPEncode(courgette::ARM_OFF21, 0x67e4, 0xf43faee9, 0x65ba);
+  TestARMOPEncode(courgette::ARM_OFF21, 0x67ee, 0xf47faee4, 0x65ba);
 }