[Courgette] Clean up Disassembler; fix ELF Memory leaks.

Cleaning up code surrounding Disassembler:
- Extract AddressTranslator interface to be used across subclasses.
- Use FileOffset = size_t by context.
- Detailed comments & TODOs in DisassemblerElf32ARM.
- Fix DisassemblerElf32ARM memory leaks.
- Lots of superficial stylistic changes.

Except for AddressTranslator routines and unit tests, shying away
from control flow and logic changes.

BUG=579206

Committed: https://crrev.com/58b822d441f5c982e879e536fa3c1cbac8fd339a
Cr-Commit-Position: refs/heads/master@{#380881}

Review URL: https://codereview.chromium.org/1676683002

Cr-Commit-Position: refs/heads/master@{#380987}
diff --git a/courgette/disassembler_elf_32.cc b/courgette/disassembler_elf_32.cc
index 84aa971..9ceb8ab 100644
--- a/courgette/disassembler_elf_32.cc
+++ b/courgette/disassembler_elf_32.cc
@@ -4,39 +4,70 @@
 
 #include "courgette/disassembler_elf_32.h"
 
-#include <stddef.h>
-#include <stdint.h>
-
 #include <algorithm>
-#include <string>
-#include <vector>
 
 #include "base/logging.h"
-#include "base/memory/scoped_vector.h"
-
 #include "courgette/assembly_program.h"
 #include "courgette/courgette.h"
-#include "courgette/encoded_program.h"
 
 namespace courgette {
 
 DisassemblerElf32::DisassemblerElf32(const void* start, size_t length)
-  : Disassembler(start, length),
-    header_(NULL),
-    section_header_table_(NULL),
-    section_header_table_size_(0),
-    program_header_table_(NULL),
-    program_header_table_size_(0),
-    default_string_section_(NULL) {
+    : Disassembler(start, length),
+      header_(nullptr),
+      section_header_table_(nullptr),
+      section_header_table_size_(0),
+      program_header_table_(nullptr),
+      program_header_table_size_(0),
+      default_string_section_(nullptr) {
+}
+
+RVA DisassemblerElf32::FileOffsetToRVA(FileOffset offset) const {
+  // File offsets can be 64-bit values, but we are dealing with 32-bit
+  // executables and so only need to support 32-bit file sizes.
+  uint32_t offset32 = static_cast<uint32_t>(offset);
+
+  for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+       ++section_id) {
+    const Elf32_Shdr* section_header = SectionHeader(section_id);
+    // These can appear to have a size in the file, but don't.
+    if (section_header->sh_type == SHT_NOBITS)
+      continue;
+
+    Elf32_Off section_begin = section_header->sh_offset;
+    Elf32_Off section_end = section_begin + section_header->sh_size;
+
+    if (offset32 >= section_begin && offset32 < section_end) {
+      return section_header->sh_addr + (offset32 - section_begin);
+    }
+  }
+
+  return 0;
+}
+
+FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const {
+  for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+       ++section_id) {
+    const Elf32_Shdr* section_header = SectionHeader(section_id);
+    // These can appear to have a size in the file, but don't.
+    if (section_header->sh_type == SHT_NOBITS)
+      continue;
+    Elf32_Addr begin = section_header->sh_addr;
+    Elf32_Addr end = begin + section_header->sh_size;
+
+    if (rva >= begin && rva < end)
+      return section_header->sh_offset + (rva - begin);
+  }
+  return kNoFileOffset;
 }
 
 bool DisassemblerElf32::ParseHeader() {
   if (length() < sizeof(Elf32_Ehdr))
     return Bad("Too small");
 
-  header_ = (Elf32_Ehdr *)start();
+  header_ = reinterpret_cast<const Elf32_Ehdr*>(start());
 
-  // Have magic for elf header?
+  // Have magic for ELF header?
   if (header_->e_ident[0] != 0x7f ||
       header_->e_ident[1] != 'E' ||
       header_->e_ident[2] != 'L' ||
@@ -59,23 +90,25 @@
   if (!IsArrayInBounds(header_->e_shoff, header_->e_shnum, sizeof(Elf32_Shdr)))
     return Bad("Out of bounds section header table");
 
-  section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
+  section_header_table_ = reinterpret_cast<const Elf32_Shdr*>(
+      FileOffsetToPointer(header_->e_shoff));
   section_header_table_size_ = header_->e_shnum;
 
   if (!IsArrayInBounds(header_->e_phoff, header_->e_phnum, sizeof(Elf32_Phdr)))
     return Bad("Out of bounds program header table");
 
-  program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
+  program_header_table_ = reinterpret_cast<const Elf32_Phdr*>(
+      FileOffsetToPointer(header_->e_phoff));
   program_header_table_size_ = header_->e_phnum;
 
   if (header_->e_shstrndx >= header_->e_shnum)
     return Bad("Out of bounds string section index");
 
-  default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
+  default_string_section_ = reinterpret_cast<const char*>(
+      SectionBody(static_cast<int>(header_->e_shstrndx)));
 
-  if (!UpdateLength()) {
+  if (!UpdateLength())
     return Bad("Out of bounds section or segment");
-  }
 
   return Good();
 }
@@ -97,7 +130,6 @@
     return false;
 
   target->DefaultAssignIndexes();
-
   return true;
 }
 
@@ -105,8 +137,9 @@
   Elf32_Off result = 0;
 
   // Find the end of the last section
-  for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
-    const Elf32_Shdr *section_header = SectionHeader(section_id);
+  for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+       ++section_id) {
+    const Elf32_Shdr* section_header = SectionHeader(section_id);
 
     if (section_header->sh_type == SHT_NOBITS)
       continue;
@@ -119,8 +152,9 @@
   }
 
   // Find the end of the last segment
-  for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
-    const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
+  for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
+       ++segment_id) {
+    const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
 
     if (!IsArrayInBounds(segment_header->p_offset, segment_header->p_filesz, 1))
       return false;
@@ -129,25 +163,26 @@
     result = std::max(result, segment_end);
   }
 
-  Elf32_Off section_table_end = header_->e_shoff +
-                                (header_->e_shnum * sizeof(Elf32_Shdr));
+  Elf32_Off section_table_end =
+      header_->e_shoff + (header_->e_shnum * sizeof(Elf32_Shdr));
   result = std::max(result, section_table_end);
 
-  Elf32_Off segment_table_end = header_->e_phoff +
-                                (header_->e_phnum * sizeof(Elf32_Phdr));
+  Elf32_Off segment_table_end =
+      header_->e_phoff + (header_->e_phnum * sizeof(Elf32_Phdr));
   result = std::max(result, segment_table_end);
 
   ReduceLength(result);
   return true;
 }
 
-CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
+CheckBool DisassemblerElf32::IsValidTargetRVA(RVA rva) const {
   if (rva == kUnassignedRVA)
     return false;
 
   // It's valid if it's contained in any program segment
-  for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
-    const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
+  for (Elf32_Half segment_id = 0; segment_id < ProgramSegmentHeaderCount();
+       ++segment_id) {
+    const Elf32_Phdr* segment_header = ProgramSegmentHeader(segment_id);
 
     if (segment_header->p_type != PT_LOAD)
       continue;
@@ -162,114 +197,58 @@
   return false;
 }
 
-CheckBool DisassemblerElf32::RVAToFileOffset(RVA rva,
-                                             size_t* result) const {
-  for (int i = 0; i < SectionHeaderCount(); i++) {
-    const Elf32_Shdr *section_header = SectionHeader(i);
-    // These can appear to have a size in the file, but don't.
-    if (section_header->sh_type == SHT_NOBITS)
-      continue;
-    Elf32_Addr begin = section_header->sh_addr;
-    Elf32_Addr end = begin + section_header->sh_size;
-
-    if (rva >= begin && rva < end) {
-      *result = section_header->sh_offset + (rva - begin);
-      return true;
-    }
-  }
-  return false;
-}
-
-RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const {
-  // File offsets can be 64 bit values, but we are dealing with 32
-  // bit executables and so only need to support 32bit file sizes.
-  uint32_t offset32 = (uint32_t)offset;
-
-  for (int i = 0; i < SectionHeaderCount(); i++) {
-
-    const Elf32_Shdr *section_header = SectionHeader(i);
-
-    // These can appear to have a size in the file, but don't.
-    if (section_header->sh_type == SHT_NOBITS)
-      continue;
-
-    Elf32_Off section_begin = section_header->sh_offset;
-    Elf32_Off section_end = section_begin + section_header->sh_size;
-
-    if (offset32 >= section_begin && offset32 < section_end) {
-      return section_header->sh_addr + (offset32 - section_begin);
-    }
-  }
-
-  return 0;
-}
-
-CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas,
-                                           std::vector<size_t>* offsets) {
-  offsets->clear();
-
-  for (std::vector<RVA>::iterator rva = rvas->begin();
-       rva != rvas->end();
-       rva++) {
-
-    size_t offset;
-
-    if (!RVAToFileOffset(*rva, &offset))
+CheckBool DisassemblerElf32::RVAsToFileOffsets(
+    const std::vector<RVA>& rvas,
+    std::vector<FileOffset>* file_offsets) {
+  file_offsets->clear();
+  for (RVA rva : rvas) {
+    FileOffset file_offset = RVAToFileOffset(rva);
+    if (file_offset == kNoFileOffset)
       return false;
-
-    offsets->push_back(offset);
+    file_offsets->push_back(file_offset);
   }
-
   return true;
 }
 
-CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) {
-  for (ScopedVector<TypedRVA>::iterator rva = rvas->begin();
-       rva != rvas->end();
-       rva++) {
-
-    size_t offset;
-
-    if (!RVAToFileOffset((*rva)->rva(), &offset))
+CheckBool DisassemblerElf32::RVAsToFileOffsets(
+    ScopedVector<TypedRVA>* typed_rvas) {
+  for (TypedRVA* typed_rva : *typed_rvas) {
+    FileOffset file_offset = RVAToFileOffset(typed_rva->rva());
+    if (file_offset == kNoFileOffset)
       return false;
-
-    (*rva)->set_offset(offset);
+    typed_rva->set_file_offset(file_offset);
   }
-
   return true;
 }
 
 CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
   // Walk all the bytes in the file, whether or not in a section.
-  uint32_t file_offset = 0;
+  FileOffset file_offset = 0;
 
-  std::vector<size_t> abs_offsets;
+  std::vector<FileOffset> abs_offsets;
 
-  if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
+  if (!RVAsToFileOffsets(abs32_locations_, &abs_offsets))
     return false;
 
-  if (!RVAsToOffsets(&rel32_locations_))
+  if (!RVAsToFileOffsets(&rel32_locations_))
     return false;
 
-  std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
+  std::vector<FileOffset>::iterator current_abs_offset = abs_offsets.begin();
   ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin();
 
-  std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
+  std::vector<FileOffset>::iterator end_abs_offset = abs_offsets.end();
   ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end();
 
-  for (int section_id = 0;
-       section_id < SectionHeaderCount();
-       section_id++) {
-
-    const Elf32_Shdr *section_header = SectionHeader(section_id);
+  for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+       ++section_id) {
+    const Elf32_Shdr* section_header = SectionHeader(section_id);
 
     if (section_header->sh_type == SHT_NOBITS)
       continue;
 
-    if (!ParseSimpleRegion(file_offset,
-                           section_header->sh_offset,
-                           program))
+    if (!ParseSimpleRegion(file_offset, section_header->sh_offset, program))
       return false;
+
     file_offset = section_header->sh_offset;
 
     switch (section_header->sh_type) {
@@ -280,10 +259,13 @@
         break;
       case SHT_PROGBITS:
         if (!ParseProgbitsSection(section_header,
-                                  &current_abs_offset, end_abs_offset,
-                                  &current_rel, end_rel,
-                                  program))
+                                  &current_abs_offset,
+                                  end_abs_offset,
+                                  &current_rel,
+                                  end_rel,
+                                  program)) {
           return false;
+        }
         file_offset = section_header->sh_offset + section_header->sh_size;
         break;
       case SHT_INIT_ARRAY:
@@ -292,28 +274,27 @@
         while (current_abs_offset != end_abs_offset &&
                *current_abs_offset >= section_header->sh_offset &&
                *current_abs_offset <
-               (section_header->sh_offset + section_header->sh_size)) {
+                   section_header->sh_offset + section_header->sh_size) {
           // Skip any abs_offsets appear in the unsupported INIT_ARRAY section
-          VLOG(1) << "Skipping relocation entry for unsupported section: " <<
-            section_header->sh_type;
-          current_abs_offset++;
+          VLOG(1) << "Skipping relocation entry for unsupported section: "
+                  << section_header->sh_type;
+          ++current_abs_offset;
         }
         break;
       default:
         if (current_abs_offset != end_abs_offset &&
-               *current_abs_offset >= section_header->sh_offset &&
-               *current_abs_offset <
-               (section_header->sh_offset + section_header->sh_size))
-          VLOG(1) << "Relocation address in unrecognized ELF section: " << \
-            section_header->sh_type;
-      break;
+            *current_abs_offset >= section_header->sh_offset &&
+            *current_abs_offset <
+                section_header->sh_offset + section_header->sh_size) {
+          VLOG(1) << "Relocation address in unrecognized ELF section: "
+                  << section_header->sh_type;
+        }
+        break;
     }
   }
 
   // Rest of the file past the last section
-  if (!ParseSimpleRegion(file_offset,
-                         length(),
-                         program))
+  if (!ParseSimpleRegion(file_offset, length(), program))
     return false;
 
   // Make certain we consume all of the relocations as expected
@@ -321,34 +302,32 @@
 }
 
 CheckBool DisassemblerElf32::ParseProgbitsSection(
-    const Elf32_Shdr *section_header,
-    std::vector<size_t>::iterator* current_abs_offset,
-    std::vector<size_t>::iterator end_abs_offset,
+    const Elf32_Shdr* section_header,
+    std::vector<FileOffset>::iterator* current_abs_offset,
+    std::vector<FileOffset>::iterator end_abs_offset,
     ScopedVector<TypedRVA>::iterator* current_rel,
     ScopedVector<TypedRVA>::iterator end_rel,
     AssemblyProgram* program) {
-
   // Walk all the bytes in the file, whether or not in a section.
-  size_t file_offset = section_header->sh_offset;
-  size_t section_end = section_header->sh_offset + section_header->sh_size;
+  FileOffset file_offset = section_header->sh_offset;
+  FileOffset section_end = section_header->sh_offset + section_header->sh_size;
 
   Elf32_Addr origin = section_header->sh_addr;
-  size_t origin_offset = section_header->sh_offset;
+  FileOffset origin_offset = section_header->sh_offset;
   if (!program->EmitOriginInstruction(origin))
     return false;
 
   while (file_offset < section_end) {
-
     if (*current_abs_offset != end_abs_offset &&
         file_offset > **current_abs_offset)
       return false;
 
     while (*current_rel != end_rel &&
-           file_offset > (**current_rel)->get_offset()) {
-      (*current_rel)++;
+           file_offset > (**current_rel)->file_offset()) {
+      ++(*current_rel);
     }
 
-    size_t next_relocation = section_end;
+    FileOffset next_relocation = section_end;
 
     if (*current_abs_offset != end_abs_offset &&
         next_relocation > **current_abs_offset)
@@ -358,8 +337,8 @@
     // an Abs value, or the end of the section, so +3 to make sure there is
     // room for the full 4 byte value.
     if (*current_rel != end_rel &&
-        next_relocation > ((**current_rel)->get_offset() + 3))
-      next_relocation = (**current_rel)->get_offset();
+        next_relocation > ((**current_rel)->file_offset() + 3))
+      next_relocation = (**current_rel)->file_offset();
 
     if (next_relocation > file_offset) {
       if (!ParseSimpleRegion(file_offset, next_relocation, program))
@@ -371,28 +350,28 @@
 
     if (*current_abs_offset != end_abs_offset &&
         file_offset == **current_abs_offset) {
-      const uint8_t* p = OffsetToPointer(file_offset);
+      const uint8_t* p = FileOffsetToPointer(file_offset);
       RVA target_rva = Read32LittleEndian(p);
 
       if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
         return false;
       file_offset += sizeof(RVA);
-      (*current_abs_offset)++;
+      ++(*current_abs_offset);
       continue;
     }
 
     if (*current_rel != end_rel &&
-        file_offset == (**current_rel)->get_offset()) {
+        file_offset == (**current_rel)->file_offset()) {
       uint32_t relative_target = (**current_rel)->relative_target();
       // This cast is for 64 bit systems, and is only safe because we
       // are working on 32 bit executables.
       RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
                              relative_target);
 
-      if (! (**current_rel)->EmitInstruction(program, target_rva))
+      if (!(**current_rel)->EmitInstruction(program, target_rva))
         return false;
       file_offset += (**current_rel)->op_size();
-      (*current_rel)++;
+      ++(*current_rel);
       continue;
     }
   }
@@ -401,17 +380,19 @@
   return ParseSimpleRegion(file_offset, section_end, program);
 }
 
-CheckBool DisassemblerElf32::ParseSimpleRegion(
-    size_t start_file_offset,
-    size_t end_file_offset,
-    AssemblyProgram* program) {
+CheckBool DisassemblerElf32::ParseSimpleRegion(FileOffset start_file_offset,
+                                               FileOffset end_file_offset,
+                                               AssemblyProgram* program) {
   // Callers don't guarantee start < end
-  if (start_file_offset >= end_file_offset) return true;
+  if (start_file_offset >= end_file_offset)
+    return true;
 
   const size_t len = end_file_offset - start_file_offset;
 
-  if (!program->EmitBytesInstruction(OffsetToPointer(start_file_offset), len))
+  if (!program->EmitBytesInstruction(FileOffsetToPointer(start_file_offset),
+                                     len)) {
     return false;
+  }
 
   return true;
 }
@@ -420,12 +401,13 @@
   abs32_locations_.clear();
 
   // Loop through sections for relocation sections
-  for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
-    const Elf32_Shdr *section_header = SectionHeader(section_id);
+  for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+       ++section_id) {
+    const Elf32_Shdr* section_header = SectionHeader(section_id);
 
     if (section_header->sh_type == SHT_REL) {
-
-      Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
+      const Elf32_Rel* relocs_table =
+          reinterpret_cast<const Elf32_Rel*>(SectionBody(section_id));
 
       int relocs_table_count = section_header->sh_size /
                                section_header->sh_entsize;
@@ -433,7 +415,7 @@
       // Elf32_Word relocation_section_id = section_header->sh_info;
 
       // Loop through relocation objects in the relocation section
-      for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
+      for (int rel_id = 0; rel_id < relocs_table_count; ++rel_id) {
         RVA rva;
 
         // Quite a few of these conversions fail, and we simply skip
@@ -451,23 +433,18 @@
 }
 
 CheckBool DisassemblerElf32::CheckSection(RVA rva) {
-  size_t offset;
-
-  if (!RVAToFileOffset(rva, &offset)) {
+  FileOffset file_offset = RVAToFileOffset(rva);
+  if (file_offset == kNoFileOffset)
     return false;
-  }
 
-  for (int section_id = 0;
-       section_id < SectionHeaderCount();
-       section_id++) {
+  for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+       ++section_id) {
+    const Elf32_Shdr* section_header = SectionHeader(section_id);
 
-    const Elf32_Shdr *section_header = SectionHeader(section_id);
-
-    if (offset >= section_header->sh_offset &&
-        offset < (section_header->sh_offset + section_header->sh_size)) {
+    if (file_offset >= section_header->sh_offset &&
+        file_offset < (section_header->sh_offset + section_header->sh_size)) {
       switch (section_header->sh_type) {
-        case SHT_REL:
-          // Fall-through
+        case SHT_REL:  // Falls through.
         case SHT_PROGBITS:
           return true;
       }
@@ -478,16 +455,14 @@
 }
 
 CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() {
-
   rel32_locations_.clear();
 
   // Loop through sections for relocation sections
-  for (int section_id = 0;
-       section_id < SectionHeaderCount();
-       section_id++) {
+  for (Elf32_Half section_id = 0; section_id < SectionHeaderCount();
+       ++section_id) {
+    const Elf32_Shdr* section_header = SectionHeader(section_id);
 
-    const Elf32_Shdr *section_header = SectionHeader(section_id);
-
+    // TODO(huangs): Add better checks to skip non-code sections.
     // Some debug sections can have sh_type=SHT_PROGBITS but sh_addr=0.
     if (section_header->sh_type != SHT_PROGBITS ||
         section_header->sh_addr == 0)