old htb folders
This commit is contained in:
2023-08-29 21:53:22 +02:00
parent 62ab804867
commit 82b0759f1e
21891 changed files with 6277643 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
"""
CLE is an extensible binary loader. Its main goal is to take an executable program and any libraries it depends on and
produce an address space where that program is loaded and ready to run.
The primary interface to CLE is the Loader class.
"""
__version__ = "9.2.35"
if bytes is str:
raise Exception("This module is designed for python 3 only. Please install an older version to use python 2.")
import logging
logging.getLogger(name=__name__).addHandler(logging.NullHandler())
# pylint: disable=wildcard-import
from . import utils
from .loader import *
from .memory import *
from .errors import *
from .backends import *
from .backends.tls import *
from .backends.externs import *
from .patched_stream import *
from .gdb import *

View File

@@ -0,0 +1,102 @@
class AddressTranslator:
__slots__ = (
"_rva",
"_owner",
)
"""
Mediates address translations between typed addresses such as RAW, RVA, LVA, MVA and VA
including address owner and its state (linked or mapped)
Semantics::
owner - object associated with the address
(any object class based on `cle.Backend`)
owner mapping state - sparse object can be either mapped or not
(actual object's image base VA to be considered valid)
RAW - offset (index) inside a file stream
VA - address inside process flat virtual memory space
RVA - address relative to the object's segment base
(segment base normalized virtual address)
LVA - linked VA (linker)
MVA - mapped VA (loader)
"""
def __init__(self, rva, owner):
"""
:param rva: virtual address relative to owner's object image base
:type rva: int
:param owner: The object owner address relates to
:type owner: cle.Backend
"""
self._rva, self._owner = rva, owner
@classmethod
def from_lva(cls, lva, owner):
"""
Loads address translator with LVA
"""
return cls(lva - owner.linked_base, owner)
@classmethod
def from_mva(cls, mva, owner):
"""
Loads address translator with MVA
"""
return cls(mva - owner.mapped_base, owner)
@classmethod
def from_rva(cls, rva, owner):
"""
Loads address translator with RVA
"""
return cls(rva, owner)
@classmethod
def from_raw(cls, raw, owner):
"""
Loads address translator with RAW address
"""
return cls(owner.offset_to_addr(raw) - (owner.mapped_base if owner._is_mapped else owner.linked_base), owner)
from_linked_va = from_lva
from_va = from_mapped_va = from_mva
from_relative_va = from_rva
def to_lva(self):
"""
VA -> LVA
:rtype: int
"""
return self._rva + self._owner.linked_base
def to_mva(self):
"""
RVA -> MVA
:rtype: int
"""
return self._rva + self._owner.mapped_base
def to_rva(self):
"""
RVA -> RVA
:rtype: int
"""
return self._rva
def to_raw(self):
"""
RVA -> RAW
:rtype: int
"""
return self._owner.addr_to_offset(
self._rva + (self._owner.mapped_base if self._owner._is_mapped else self._owner.linked_base)
)
to_linked_va = to_lva
to_va = to_mapped_va = to_mva
to_relative_va = to_rva
AT = AddressTranslator

View File

@@ -0,0 +1,558 @@
# pylint:disable=wrong-import-position
import os
import logging
import hashlib
from io import BufferedReader
from typing import List, Optional # pylint:disable=unused-import
import typing
import sortedcontainers
import archinfo
from .region import Region, Segment, Section
from .regions import Regions
from .symbol import Symbol, SymbolType
from ..address_translator import AT
from ..memory import Clemory
from ..errors import CLEOperationError, CLEError
l = logging.getLogger(name=__name__)
if typing.TYPE_CHECKING:
from .. import Loader, Relocation
class FunctionHintSource:
"""
Enums that describe the source of function hints.
"""
EH_FRAME = 0
EXTERNAL_EH_FRAME = 1
class FunctionHint:
"""
Describes a function hint.
:ivar int addr: Address of the function.
:ivar int size: Size of the function.
:ivar source: Source of this hint.
:vartype source: int
"""
__slots__ = ("addr", "size", "source")
def __init__(self, addr, size, source):
self.addr = addr
self.size = size
self.source = source
def __repr__(self):
return "<FuncHint@%#x, %d bytes>" % (self.addr, self.size)
class ExceptionHandling:
"""
Describes an exception handling.
Exception handlers are usually language-specific. In C++, it is usually implemented as try {} catch {} blocks.
:ivar int start_addr: The beginning of the try block.
:ivar int size: Size of the try block.
:ivar Optional[int] handler_addr: Address of the exception handler code.
:ivar type: Type of the exception handler. Optional.
:ivar Optional[int] func_addr: Address of the function. Optional.
"""
__slots__ = (
"start_addr",
"size",
"handler_addr",
"type",
"func_addr",
)
def __init__(self, start_addr, size, handler_addr=None, type_=None, func_addr=None):
self.start_addr = start_addr
self.size = size
self.handler_addr = handler_addr
self.type = type_
self.func_addr = func_addr
def __repr__(self):
if self.handler_addr is not None:
return "<ExceptionHandling@{:#x}-{:#x}: handler@{:#x}>".format(
self.start_addr, self.start_addr + self.size, self.handler_addr
)
else:
return "<ExceptionHandling@{:#x}-{:#x}: no handler>".format(self.start_addr, self.start_addr + self.size)
class Backend:
"""
Main base class for CLE binary objects.
An alternate interface to this constructor exists as the static method :meth:`cle.loader.Loader.load_object`
:ivar binary: The path to the file this object is loaded from
:ivar binary_basename: The basename of the filepath, or a short representation of the stream it was loaded from
:ivar is_main_bin: Whether this binary is loaded as the main executable
:ivar segments: A listing of all the loaded segments in this file
:ivar sections: A listing of all the demarked sections in the file
:ivar sections_map: A dict mapping from section name to section
:ivar imports: A mapping from symbol name to import relocation
:ivar resolved_imports: A list of all the import symbols that are successfully resolved
:ivar relocs: A list of all the relocations in this binary
:ivar irelatives: A list of tuples representing all the irelative relocations that need to be performed. The
first item in the tuple is the address of the resolver function, and the second item is the
address of where to write the result. The destination address is an RVA.
:ivar jmprel: A mapping from symbol name to the address of its jump slot relocation, i.e. its GOT entry.
:ivar arch: The architecture of this binary
:vartype arch: archinfo.arch.Arch
:ivar str os: The operating system this binary is meant to run under
:ivar int mapped_base: The base address of this object in virtual memory
:ivar deps: A list of names of shared libraries this binary depends on
:ivar linking: 'dynamic' or 'static'
:ivar linked_base: The base address this object requests to be loaded at
:ivar bool pic: Whether this object is position-independent
:ivar bool execstack: Whether this executable has an executable stack
:ivar str provides: The name of the shared library dependancy that this object resolves
:ivar list symbols: A list of symbols provided by this object, sorted by address
:ivar has_memory: Whether this backend is backed by a Clemory or not. As it stands now, a backend should still
define `min_addr` and `max_addr` even if `has_memory` is False.
"""
is_default = False
loader: "Loader"
def __init__(
self,
binary,
binary_stream,
loader=None,
is_main_bin=False,
entry_point=None,
arch=None,
base_addr=None,
force_rebase=False,
has_memory=True,
**kwargs,
):
"""
:param binary: The path to the binary to load
:param binary_stream: The open stream to this binary. The reference to this will be held until you call close.
:param is_main_bin: Whether this binary should be loaded as the main executable
"""
self.binary = binary
self._binary_stream: BufferedReader = binary_stream
if self.binary is not None:
self.binary_basename = os.path.basename(self.binary)
elif hasattr(self._binary_stream, "name"):
self.binary_basename = os.path.basename(self._binary_stream.name)
else:
self.binary_basename = str(self._binary_stream)
for k in list(kwargs.keys()):
if k == "custom_entry_point":
entry_point = kwargs.pop(k)
elif k == "custom_arch":
arch = kwargs.pop(k)
elif k == "custom_base_addr":
base_addr = kwargs.pop(k)
else:
continue
l.critical("Deprecation warning: the %s parameter has been renamed to %s", k, k[7:])
if kwargs != {}:
l.warning("Unused kwargs for loading binary %s: %s", self.binary, ", ".join(kwargs.keys()))
self.is_main_bin = is_main_bin
self.has_memory = has_memory
self.loader = loader
self._entry = 0
self._segments = Regions() # List of segments
self._sections = Regions() # List of sections
self.sections_map = {} # Mapping from section name to section
self.symbols: "sortedcontainers.SortedKeyList[Symbol]" = sortedcontainers.SortedKeyList(
key=self._get_symbol_relative_addr
)
self.imports: typing.Dict[str, "Relocation"] = {}
self.resolved_imports = []
self.relocs: "List[Relocation]" = []
self.irelatives = [] # list of tuples (resolver, destination), dest w/o rebase
self.jmprel = {}
self.arch = None # type: Optional[archinfo.Arch]
self.os = None # Let other stuff override this
self.compiler = None, None # compiler name, version
self._symbol_cache = {}
# a list of directories to search for libraries specified by the object
self.extra_load_path = []
# attributes to enable SimProcedure guessing
self.guess_simprocs = False
self.guess_simprocs_hint = None
# checksums
self.md5 = None
self.sha256 = None
self.mapped_base_symbolic = 0
# These are set by cle, and should not be overriden manually
self.mapped_base = self.linked_base = 0 # not to be set manually - used by CLE
self.deps = [] # Needed shared objects (libraries dependencies)
self.child_objects = [] # any objects loaded directly out of this
self.parent_object = None
self.linking = None # Dynamic or static linking
self.pic = force_rebase
self.execstack = False
# tls info set by backend to communicate with thread manager
self.tls_used = False
self.tls_block_size = None
self.tls_data_size = None
self.tls_data_start = None
# tls info set by thread manager
self.tls_module_id = None
# self.tls_block_offset = None # this is an ELF-only attribute
# exception handling
# they should be rebased when .rebase() is called
self.exception_handlings = [] # type: List[ExceptionHandling]
# Hints
# they should be rebased when .rebase() is called
self.function_hints = [] # type: List[FunctionHint]
# line number mapping
self.addr_to_line = {}
# Custom options
self._custom_entry_point = entry_point
self._custom_base_addr = base_addr
self.provides = os.path.basename(self.binary) if self.binary is not None else None
self.memory = None # type: Clemory
# should be set inside `cle.Loader.add_object`
self._is_mapped = False
# cached max_addr
self._max_addr = None
# cached last section
self._last_section = None
# cached last segment
self._last_segment = None
if arch is None:
self.arch = None
elif isinstance(arch, str):
self.set_arch(archinfo.arch_from_id(arch))
elif isinstance(arch, archinfo.Arch):
self.set_arch(arch)
elif isinstance(arch, type) and issubclass(arch, archinfo.Arch):
self.set_arch(arch())
else:
raise CLEError("Bad parameter: arch=%s" % arch)
self._checksum()
def close(self):
del self._binary_stream
def __repr__(self):
return "<%s Object %s, maps [%#x:%#x]>" % (
self.__class__.__name__,
self.binary_basename,
self.min_addr,
self.max_addr,
)
def set_arch(self, arch):
self.arch = arch
self.memory = Clemory(arch) # Private virtual address space, without relocations
@property
def image_base_delta(self):
return self.mapped_base - self.linked_base
@property
def entry(self):
if self._custom_entry_point is not None:
return AT.from_lva(self._custom_entry_point, self).to_mva()
return AT.from_lva(self._entry, self).to_mva()
@property
def segments(self) -> Regions:
return self._segments
@segments.setter
def segments(self, v):
if isinstance(v, list):
self._segments = Regions(lst=v)
elif isinstance(v, Regions):
self._segments = v
else:
raise ValueError("Unsupported type %s set as sections." % type(v))
@property
def sections(self):
return self._sections
@sections.setter
def sections(self, v):
if isinstance(v, list):
self._sections = Regions(lst=v)
elif isinstance(v, Regions):
self._sections = v
else:
raise ValueError("Unsupported type %s set as sections." % type(v))
@property
def symbols_by_addr(self):
l.critical(
"Deprecation warning: symbols_by_addr is deprecated - use loader.find_symbol() for lookup "
"and .symbols for enumeration"
)
return {s.rebased_addr: s for s in self.symbols}
def rebase(self, new_base):
"""
Rebase backend's regions to the new base where they were mapped by the loader
"""
if self._is_mapped:
# we could rebase an object twice if we really wanted... no need though, right?
raise CLEOperationError(f"Image already rebased from {self.linked_base:#x} to {self.mapped_base:#x}")
self.mapped_base = new_base
if self.sections:
self.sections._rebase(self.image_base_delta)
if self.segments and self.sections is not self.segments:
self.segments._rebase(self.image_base_delta)
for handling in self.exception_handlings:
if handling.func_addr is not None:
handling.func_addr += self.image_base_delta
if handling.handler_addr is not None:
handling.handler_addr += self.image_base_delta
handling.start_addr += self.image_base_delta
for hint in self.function_hints:
hint.addr = hint.addr + self.image_base_delta
def relocate(self):
"""
Apply all resolved relocations to memory.
The meaning of "resolved relocations" is somewhat subtle - there is a linking step which attempts to resolve
each relocation, currently only present in the main internal loading function since the calculation of which
objects should be available
"""
for reloc in self.relocs:
if reloc.resolved:
reloc.relocate()
def contains_addr(self, addr):
"""
Is `addr` in one of the binary's segments/sections we have loaded? (i.e. is it mapped into memory ?)
"""
return self.find_loadable_containing(addr) is not None
def find_loadable_containing(self, addr):
lookup = self.find_segment_containing if self.segments else self.find_section_containing
return lookup(addr)
def find_segment_containing(self, addr):
"""
Returns the segment that contains `addr`, or ``None``.
"""
if self._last_segment is not None and self._last_segment.contains_addr(addr):
return self._last_segment
r = self.segments.find_region_containing(addr)
if r is not None:
self._last_segment = r
return r
def find_section_containing(self, addr):
"""
Returns the section that contains `addr` or ``None``.
"""
if self._last_section is not None and self._last_section.contains_addr(addr):
return self._last_section
r = self.sections.find_region_containing(addr)
if r is not None:
self._last_section = r
return r
def addr_to_offset(self, addr):
loadable = self.find_loadable_containing(addr)
if loadable is not None:
return loadable.addr_to_offset(addr)
else:
return None
def offset_to_addr(self, offset):
if self.segments:
for s in self.segments:
if s.contains_offset(offset):
return s.offset_to_addr(offset)
else:
for s in self.sections:
if s.contains_offset(offset):
return s.offset_to_addr(offset)
return None
@property
def min_addr(self):
"""
This returns the lowest virtual address contained in any loaded segment of the binary.
"""
# Loader maps the object at chosen mapped base anyway and independently of the internal structure
return self.mapped_base
@property
def max_addr(self):
"""
This returns the highest virtual address contained in any loaded segment of the binary.
"""
if self._max_addr is None:
out = self.mapped_base
if self.segments or self.sections:
out = max(map(lambda x: x.max_addr, self.segments or self.sections))
self._max_addr = out - self.mapped_base
return self._max_addr + self.mapped_base
@property
def initializers(self): # pylint: disable=no-self-use
"""
Stub function. Should be overridden by backends that can provide initializer functions that ought to be run
before execution reaches the entry point. Addresses should be rebased.
"""
return []
@property
def finalizers(self): # pylint: disable=no-self-use
"""
Stub function. Like initializers, but with finalizers.
"""
return []
@property
def threads(self): # pylint: disable=no-self-use
"""
If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e.
register files. This property should contain a list of names for these threads, which should be unique.
"""
return []
def thread_registers(self, thread=None): # pylint: disable=no-self-use,unused-argument
"""
If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e.
register files. This method should return the register file for a given thread (as named in ``Backend.threads``)
as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should
return the context for a "default" thread. If there are no threads, it should return an empty dict.
"""
return {}
def initial_register_values(self):
"""
Deprecated
"""
l.critical(
"Deprecation warning: initial_register_values is deprecated - " "use backend.thread_registers() instead"
)
return self.thread_registers().items()
def get_symbol(self, name): # pylint: disable=no-self-use,unused-argument
"""
Stub function. Implement to find the symbol with name `name`.
"""
if name in self._symbol_cache:
return self._symbol_cache[name]
return None
@staticmethod
def extract_soname(path): # pylint: disable=unused-argument
"""
Extracts the shared object identifier from the path, or returns None if it cannot.
"""
return None
@classmethod
def is_compatible(cls, stream): # pylint:disable=unused-argument
"""
Determine quickly whether this backend can load an object from this stream
"""
return False
@classmethod
def check_compatibility(cls, spec, obj): # pylint: disable=unused-argument
"""
Performs a minimal static load of ``spec`` and returns whether it's compatible with other_obj
"""
return False
@classmethod
def check_magic_compatibility(cls, stream): # pylint: disable=unused-argument
"""
Check if a stream of bytes contains the same magic number as the main object
"""
return False
@staticmethod
def _get_symbol_relative_addr(symbol):
return symbol.relative_addr
def _checksum(self):
"""
Calculate MD5 and SHA256 checksum for the binary.
"""
if self._binary_stream is not None:
data = self._binary_stream.read()
self._binary_stream.seek(0)
self.md5 = hashlib.md5(data).digest()
self.sha256 = hashlib.sha256(data).digest()
def __getstate__(self):
state = self.__dict__.copy()
state["symbols"] = list(state["symbols"])
return state
def __setstate__(self, state):
state["symbols"] = sortedcontainers.SortedKeyList(state["symbols"], key=self._get_symbol_relative_addr)
self.__dict__.update(state)
for sym in self.symbols:
sym.owner = self
ALL_BACKENDS = {}
def register_backend(name, cls):
ALL_BACKENDS.update({name: cls})
from .elf import ELF, ELFCore, MetaELF
from .pe import PE
# from .idabin import IDABin
from .blob import Blob
from .cgc import CGC, BackedCGC
from .ihex import Hex
from .minidump import Minidump
from .macho import MachO
from .named_region import NamedRegion
from .java.jar import Jar
from .java.apk import Apk
from .java.soot import Soot
from .xbe import XBE
from .static_archive import StaticArchive
# BinjaBin is not imported by default since importing it is too slow
# you may manually import it by running `from cle.backends.binja import BinjaBin`

View File

@@ -0,0 +1,254 @@
import logging
from . import Backend, register_backend, Symbol, SymbolType
from .relocation import Relocation
from ..errors import CLEError
from ..address_translator import AT
import archinfo
l = logging.getLogger(name=__name__)
try:
import binaryninja as bn
except ImportError:
bn = None
l.debug("Unable to import binaryninja module")
BINJA_NOT_INSTALLED_STR = "Binary Ninja does not appear to be installed. Please ensure Binary Ninja \
and its Python API are properly installed before using this backend."
class BinjaSymbol(Symbol):
BINJA_FUNC_SYM_TYPES = (
[bn.SymbolType.ImportedFunctionSymbol, bn.SymbolType.FunctionSymbol, bn.SymbolType.ImportAddressSymbol]
if bn
else []
)
BINJA_DATA_SYM_TYPES = [bn.SymbolType.ImportedDataSymbol, bn.SymbolType.DataSymbol] if bn else []
BINJA_IMPORT_TYPES = (
[bn.SymbolType.ImportedFunctionSymbol, bn.SymbolType.ImportAddressSymbol, bn.SymbolType.ImportedDataSymbol]
if bn
else []
)
def __init__(self, owner, sym):
if not bn:
raise CLEError(BINJA_NOT_INSTALLED_STR)
if sym.type in self.BINJA_FUNC_SYM_TYPES:
symtype = SymbolType.TYPE_FUNCTION
elif sym.type in self.BINJA_DATA_SYM_TYPES:
symtype = SymbolType.TYPE_OBJECT
else:
symtype = SymbolType.TYPE_OTHER
super().__init__(owner, sym.raw_name, AT.from_rva(sym.address, owner).to_rva(), owner.bv.address_size, symtype)
if sym.type in self.BINJA_IMPORT_TYPES:
self.is_import = True
# TODO: set is_weak appropriately
class BinjaReloc(Relocation):
@property
def value(self):
return self.relative_addr
class BinjaBin(Backend):
"""
Get information from binaries using Binary Ninja. Basing this on idabin.py, but will try to be more complete.
TODO: add more features as Binary Ninja's feature set improves
"""
is_default = True # Tell CLE to automatically consider using the BinjaBin backend
BINJA_ARCH_MAP = {
"aarch64": archinfo.ArchAArch64(endness="Iend_LE"),
"armv7": archinfo.ArchARMEL(endness="Iend_LE"),
"thumb2": archinfo.ArchARMEL(endness="Iend_LE"),
"armv7eb": archinfo.ArchARMEL(endness="Iend_BE"),
"thumb2eb": archinfo.ArchARMEL(endness="Iend_BE"),
"mipsel32": archinfo.ArchMIPS32(endness="Iend_LE"),
"mips32": archinfo.ArchMIPS32(endness="Iend_BE"),
"ppc": archinfo.ArchPPC32(endness="Iend_BE"),
"ppc_le": archinfo.ArchPPC32(endness="Iend_LE"),
"x86": archinfo.ArchX86(),
"x86_64": archinfo.ArchAMD64(),
}
def __init__(self, binary, *args, **kwargs):
super().__init__(binary, *args, **kwargs)
if not bn:
raise CLEError(BINJA_NOT_INSTALLED_STR)
# get_view_of_file can take a bndb or binary - wait for autoanalysis to complete
self.bv = bn.BinaryViewType.get_view_of_file(binary, False)
l.info("Analyzing %s, this may take some time...", binary)
self.bv.update_analysis_and_wait()
l.info("Analysis complete")
# Note may want to add option to kick off linear sweep
try:
self.set_arch(self.BINJA_ARCH_MAP[self.bv.arch.name])
except KeyError:
l.error("Architecture %s is not supported.", self.bv.arch.name)
for seg in self.bv.segments:
l.info("Adding memory for segment at %x.", seg.start)
br = bn.BinaryReader(self.bv)
br.seek(seg.start)
data = br.read(len(seg))
self.memory.add_backer(seg.start, data)
self._find_got()
self._symbol_cache = {}
self._init_symbol_cache()
# Note: this represents the plt stub. ImportAddressSymbol refers to .got entries
# Since we're not trying to import and load dependencies directly, but want to run SimProcedures,
# We should use the binaryninja.SymbolType.ImportedFunctionSymbol
# Also this should be generalized to get data imports, too
self.raw_imports = {
i.name: i.address for i in self.bv.get_symbols_of_type(bn.SymbolType.ImportedFunctionSymbol)
}
self._process_imports()
self.exports = {}
self.linking = "static" if len(self.raw_imports) == 0 else "dynamic"
# We'll look for this attribute to see if we need to do SimProcedures for any imports in this binary
# This is an ugly hack, but will have to use this for now until Binary Ninja exposes dependencies
self.guess_simprocs = True
self.guess_simprocs_hint = "nix" if self.bv.get_section_by_name(".plt") else "win"
l.warning(
"This backend is based on idabin.py.\n\
You may encounter unexpected behavior if:\n\
\tyour target depends on library data symbol imports, or\n\
\tlibrary imports that don't have a guess-able SimProcedure\n\
Good luck!"
)
def _process_imports(self):
"""Process self.raw_imports into list of Relocation objects"""
if not self.raw_imports:
l.warning("No imports found - if this is a dynamically-linked binary, something probably went wrong.")
for name, addr in self.raw_imports.items():
BinjaReloc(self, self._symbol_cache[name], addr)
def _init_symbol_cache(self):
# Note that we could also access name, short_name, or full_name attributes
for sym in self.bv.get_symbols():
cle_sym = BinjaSymbol(self, sym)
self._symbol_cache[sym.raw_name] = cle_sym
self.symbols.add(cle_sym)
def _find_got(self):
"""
Locate the section (e.g. .got) that should be updated when relocating functions (that's where we want to
write absolute addresses).
"""
sec_name = self.arch.got_section_name
self.got_begin = None
self.got_end = None
try:
got_sec = self.bv.sections[self.arch.got_section_name]
self.got_begin = got_sec.start
self.got_end = got_sec.end
except KeyError:
l.warning("No got section mapping found!")
# If we reach this point, we should have the addresses
if self.got_begin is None or self.got_end is None:
l.warning("No section %s, is this a static binary ? (or stripped)", sec_name)
return False
return True
@staticmethod
def is_compatible(stream):
if not bn:
return False
magic = stream.read(100)
stream.seek(0)
# bndb files are SQlite 3
if magic.startswith(b"SQLite format 3") and stream.name.endswith("bndb"):
return True
return False
def in_which_segment(self, addr):
"""
Return the segment name at address `addr`.
"""
# WARNING: if there are overlapping sections, we choose the first name.
# The only scenario I've seen here is a NOBITS section that "overlaps" with another one, but
# I'm not sure if that's a heurstic that should be applied here.
# https://stackoverflow.com/questions/25501044/gcc-ld-overlapping-sections-tbss-init-array-in-statically-linked-elf-bin#25771838
seg = self.bv.get_sections_at(addr)[0].name
return "unknown" if len(seg) == 0 else seg
def get_symbol_addr(self, sym):
"""
Get the address of the symbol `sym` from IDA.
:returns: An address.
"""
# sym is assumed to be the raw_name of the symbol
return self.bv.get_symbol_by_raw_name(sym)
def function_name(self, addr):
"""
Return the function name at address `addr`.
"""
func = self.bv.get_function_at(addr)
if not func:
return "UNKNOWN"
return func.name
@property
def min_addr(self):
"""
Get the min address of the binary. (note: this is probably not "right")
"""
return self.bv.start
@property
def max_addr(self):
"""
Get the max address of the binary.
"""
return self.bv.end
@property
def entry(self):
if self._custom_entry_point is not None:
return self._custom_entry_point + self.mapped_base
return self.bv.entry_point + self.mapped_base
def get_strings(self):
"""
Extract strings from binary (Binary Ninja).
:returns: An array of strings.
"""
return self.bv.get_strings()
def set_got_entry(self, name, newaddr):
"""
Resolve import `name` with address `newaddr`. That is, update the GOT entry for `name` with `newaddr`.
"""
if name not in self.imports:
l.warning("%s not in imports", name)
return
addr = self.imports[name]
self.memory.pack_word(addr, newaddr)
def close(self):
"""
Release the BinaryView we created in __init__
:return: None
"""
self.bv.file.close()
register_backend("binja", BinjaBin)

View File

@@ -0,0 +1,115 @@
from . import Backend, register_backend
from ..errors import CLEError
from .region import Segment
import logging
l = logging.getLogger(name=__name__)
__all__ = ("Blob",)
class Blob(Backend):
"""
Representation of a binary blob, i.e. an executable in an unknown file format.
"""
is_default = True # Tell CLE to automatically consider using the Blob backend
def __init__(self, *args, offset=None, segments=None, **kwargs):
"""
:param arch: (required) an :class:`archinfo.Arch` for the binary blob.
:param offset: Skip this many bytes from the beginning of the file.
:param segments: List of tuples describing how to map data into memory. Tuples
are of ``(file_offset, mem_addr, size)``.
You can't specify both ``offset`` and ``segments``.
"""
if "custom_offset" in kwargs:
offset = kwargs.pop("custom_offset")
l.critical("Deprecation warning: the custom_offset parameter has been renamed to offset")
super().__init__(*args, **kwargs)
if self.arch is None:
raise CLEError("Must specify arch when loading blob!")
if self._custom_entry_point is None:
l.warning("No entry_point was specified for blob %s, assuming 0", self.binary_basename)
self._entry = 0
self._max_addr = 0
self._min_addr = 2**64
try:
self.linked_base = kwargs["base_addr"]
except KeyError:
l.warning("No base_addr was specified for blob %s, assuming 0", self.binary_basename)
self.mapped_base = self.linked_base
self.os = "unknown"
if offset is not None:
if segments is not None:
l.error("You can't specify both offset and segments. Taking only the segments data")
else:
self._binary_stream.seek(0, 2)
segments = [(offset, self.linked_base, self._binary_stream.tell() - offset)]
else:
if segments is not None:
pass
else:
self._binary_stream.seek(0, 2)
segments = [(0, self.linked_base, self._binary_stream.tell())]
for file_offset, mem_addr, size in segments:
self._load(file_offset, mem_addr, size)
@staticmethod
def is_compatible(stream):
return stream == 0 # I hate pylint
@property
def min_addr(self):
return self._min_addr
@property
def max_addr(self):
return self._max_addr
def _load(self, file_offset, mem_addr, size):
"""
Load a segment into memory.
"""
self._binary_stream.seek(file_offset)
string = self._binary_stream.read(size)
if string:
self.memory.add_backer(mem_addr - self.linked_base, string)
seg = Segment(file_offset, mem_addr, size, size)
self.segments.append(seg)
self._max_addr = max(len(string) + mem_addr - 1, self._max_addr)
self._min_addr = min(mem_addr, self._min_addr)
def function_name(self, addr): # pylint: disable=unused-argument,no-self-use
"""
Blobs don't support function names.
"""
return None
def contains_addr(self, addr):
return addr >= self.mapped_base and (addr - self.mapped_base) in self.memory
def in_which_segment(self, addr): # pylint: disable=unused-argument,no-self-use
"""
Blobs don't support segments.
"""
return None
@classmethod
def check_compatibility(cls, spec, obj): # pylint: disable=unused-argument
return True
def _checksum(self):
return
register_backend("blob", Blob)

View File

@@ -0,0 +1,2 @@
from .cgc import CGC
from .backedcgc import BackedCGC

View File

@@ -0,0 +1,89 @@
from .cgc import CGC
from .. import register_backend
from ..region import Segment
class FakeSegment(Segment):
def __init__(self, start, size):
super().__init__(0, start, 0, size)
self.is_readable = True
self.is_writable = True
self.is_executable = False
class BackedCGC(CGC):
"""
This is a backend for CGC executables that allows user provide a memory backer and a register backer as the
initial state of the running binary.
"""
is_default = True # Tell CLE to automatically consider using the BackedCGC backend
def __init__(
self,
*args,
memory_backer=None,
register_backer=None,
writes_backer=None,
permissions_map=None,
current_allocation_base=None,
**kwargs,
):
"""
:param path: File path to CGC executable.
:param memory_backer: A dict of memory content, with beginning address of each segment as key and
actual memory content as data.
:param register_backer: A dict of all register contents. EIP will be used as the entry point of this
executable.
:param permissions_map: A dict of memory region to permission flags
:param current_allocation_base: An integer representing the current address of the top of the CGC heap.
"""
super().__init__(*args, **kwargs)
self.memory_backer = memory_backer
self.register_backer = register_backer
self.writes_backer = writes_backer
self.permissions_map = permissions_map
self.current_allocation_base = current_allocation_base
for seg in self.segments:
if seg.is_executable:
exec_seg_addr = seg.vaddr
break
else:
raise ValueError("Couldn't find executable segment?")
for start, _ in self.memory._backers:
if start != exec_seg_addr:
self.memory.remove_backer(start)
for start, data in sorted(self.memory_backer.items()):
existing_seg = self.find_segment_containing(start)
if existing_seg is None: # this is the text or data segment
new_seg = FakeSegment(start, len(data))
self.segments.append(new_seg)
if start == exec_seg_addr:
continue
if start in self.memory:
raise ValueError("IF THIS GETS THROWN I'M GONNA JUMP OUT THE WINDOW")
self.memory.add_backer(start, data)
if self.register_backer is not None and "eip" in self.register_backer:
self._entry = self.register_backer["eip"]
@staticmethod
def is_compatible(stream):
return False # Don't use this for anything unless it's manual
@property
def threads(self):
return [0]
def thread_registers(self, thread=None):
return self.register_backer.items()
register_backend("backedcgc", BackedCGC)

View File

@@ -0,0 +1,42 @@
from ...address_translator import AT
from .. import register_backend
from ..elf import ELF
from ...patched_stream import PatchedStream
ELF_HEADER = bytes.fromhex("7f454c46010101000000000000000000")
CGC_HEADER = bytes.fromhex("7f43474301010143014d6572696e6f00")
class CGC(ELF):
"""
Backend to support the CGC elf format used by the Cyber Grand Challenge competition.
See : https://github.com/CyberGrandChallenge/libcgcef/blob/master/cgc_executable_format.md
"""
is_default = True # Tell CLE to automatically consider using the CGC backend
def __init__(self, binary, binary_stream, *args, **kwargs):
binary_stream = PatchedStream(binary_stream, [(0, ELF_HEADER)])
super().__init__(binary, binary_stream, *args, **kwargs)
self.memory.store(AT.from_raw(0, self).to_rva(), CGC_HEADER) # repair the CGC header
self.os = "cgc"
self.execstack = True # the stack is always executable in CGC
@staticmethod
def is_compatible(stream):
stream.seek(0)
identstring = stream.read(4)
stream.seek(0)
if identstring.startswith(b"\x7fCGC"):
return True
return False
def _load_segment(self, seg):
if seg.header.p_memsz > 0:
super()._load_segment(seg)
supported_filetypes = ["cgc"]
register_backend("cgc", CGC)

View File

@@ -0,0 +1,3 @@
from .elf import ELF
from .metaelf import MetaELF
from .elfcore import ELFCore

View File

@@ -0,0 +1,34 @@
import os.path
from typing import Dict, List
from cle.address_translator import AT
from .variable import Variable
from .subprogram import Subprogram
class CompilationUnit:
"""
CompilationUnit for DWARF
See http://dwarfstd.org/doc/DWARF5.pdf page 60
"""
def __init__(self, name, comp_dir, low_pc, high_pc, language, elf_object):
self.name = name
self.comp_dir = comp_dir
self.file_path = os.path.join(self.comp_dir, self.name)
self.low_pc = low_pc
self.high_pc = high_pc
self.language = language
self.functions: Dict[int, Subprogram] = {}
self.global_variables: List[Variable] = []
self._elf_object = elf_object
@property
def min_addr(self):
return AT.from_rva(self.low_pc, self._elf_object).to_mva()
@property
def max_addr(self):
return AT.from_rva(self.high_pc, self._elf_object).to_mva()

View File

@@ -0,0 +1,640 @@
import os
import struct
import elftools
import logging
from collections import defaultdict
from .elf import ELF
from ..blob import Blob
from ..region import Segment
from .. import register_backend
from ...errors import CLEError, CLECompatibilityError
from ...memory import Clemory
from ...address_translator import AT
l = logging.getLogger(name=__name__)
# TODO: yall know struct.unpack_from exists, right? maybe even bitstream?
class ELFCore(ELF):
"""
Loader class for ELF core files.
One key pain point when analyzing a core dump generated on a remote machine is that the paths to binaries are
absolute (and may not exist or be the same on your local machine).
Therefore, you can use the options ```remote_file_mapping`` to specify a ``dict`` mapping (easy if there are a small
number of mappings) or ``remote_file_mapper`` to specify a function that accepts a remote file name and returns the
local file name (useful if there are many mappings).
If you specify both ``remote_file_mapping`` and ``remote_file_mapper``, ``remote_file_mapping`` is applied first,
then the result is passed to ``remote_file_mapper``.
:param executable: Optional path to the main binary of the core dump. If not supplied, ELFCore will
attempt to figure it out automatically from the core dump.
:param remote_file_mapping: Optional dict that maps specific file names in the core dump to other file names.
:param remote_file_mapper: Optional function that is used to map every file name in the core dump to whatever is
returned from this function.
"""
is_default = True # Tell CLE to automatically consider using the ELFCore backend
def __init__(self, *args, executable=None, remote_file_mapping=None, remote_file_mapper=None, **kwargs):
super().__init__(*args, **kwargs)
self.filename_lookup = []
self.__current_thread = None
self._threads = []
self.auxv = {}
self.pr_fname = None
self._main_filepath = executable
self._page_size = 0x1000 # a default page size, will be changed later by parsing notes
self._main_object = None
if remote_file_mapping is not None:
self._remote_file_mapper = lambda x: remote_file_mapping.get(x, x)
else:
self._remote_file_mapper = lambda x: x
if remote_file_mapper is not None:
orig = self._remote_file_mapper
self._remote_file_mapper = lambda x: remote_file_mapper(orig(x))
self.__extract_note_info()
self.__reload_children()
self._remote_file_mapper = None
@staticmethod
def is_compatible(stream):
stream.seek(0)
identstring = stream.read(0x1000)
stream.seek(0)
if identstring.startswith(b"\x7fELF"):
if elftools.elf.elffile.ELFFile(stream).header["e_type"] == "ET_CORE":
return True
return False
return False
def __cycle_thread(self):
if self.__current_thread is not None:
self._threads.append(self.__current_thread)
self.__current_thread = {}
@property
def threads(self):
return list(range(len(self._threads)))
def thread_registers(self, thread=None):
if thread is None:
thread = 0
return self._threads[thread]["registers"]
def __extract_note_info(self):
"""
All meaningful information about the process's state at crashtime is stored in the note segment.
"""
for seg_readelf in self._reader.iter_segments():
if seg_readelf.header.p_type == "PT_NOTE":
for note in seg_readelf.iter_notes():
if note.n_type == "NT_PRSTATUS":
self.__cycle_thread()
n_desc = note.n_desc.encode("latin-1") if isinstance(note.n_desc, str) else note.n_desc
self.__parse_prstatus(n_desc)
elif note.n_type == "NT_PRPSINFO":
self.__parse_prpsinfo(note.n_desc)
elif note.n_type == "NT_AUXV":
n_desc = note.n_desc.encode("latin-1") if isinstance(note.n_desc, str) else note.n_desc
self.__parse_auxv(n_desc)
elif note.n_type == "NT_FILE":
self.__parse_files(note.n_desc)
elif note.n_type == 512 and self.arch.name == "X86":
n_desc = note.n_desc.encode("latin-1") if isinstance(note.n_desc, str) else note.n_desc
self.__parse_x86_tls(n_desc)
self._replace_main_object_path()
self.__cycle_thread()
if not self._threads:
l.warning("Could not find thread info, cannot initialize registers")
elif self.arch.name == "X86" and "segments" not in self._threads[0]:
if "AT_RANDOM" in self.auxv:
l.warning(
"This core dump does not contain TLS information. threads will be matched to TLS regions via heuristics"
)
pointer_rand = self.auxv["AT_RANDOM"][4:8]
all_locations = [
addr - 0x18
for addr in self.__dummy_clemory.find(pointer_rand)
if self.__dummy_clemory.unpack_word(addr - 0x18) == addr - 0x18
]
# the heuristic is that generally threads are allocated with descending tls addresses
for thread, loc in zip(self._threads, reversed(all_locations)):
thread["segments"] = {thread["registers"]["gs"] >> 3: (loc, 0xFFFFF, 0x51)}
else:
l.warning("This core dump does not contain TLS or auxv information. TLS information will be wrong.")
for thread in self._threads:
thread["segments"] = {thread["registers"]["gs"] >> 3: (0, 0xFFFFFFFF, 0x51)}
def _replace_main_object_path(self):
"""
try to replace path of the main_object with the specified one
"""
if not self._main_filepath or not self.filename_lookup:
return
# identify the original path and assuming pr_fname always exists
matched = None
for i, (a, b, c, fn) in enumerate(self.filename_lookup):
if os.path.basename(fn).startswith(
self.pr_fname
): # pr_fname is defined to be the first 16 bytes of the executable name
matched = fn
break
else:
raise CLEError("Fail to find the main object, is this core dump malformed?")
# replace the path
for i, (a, b, c, fn) in enumerate(self.filename_lookup):
if fn == matched:
self.filename_lookup[i] = (a, b, c, self._main_filepath)
@property
def __dummy_clemory(self):
dummy_clemory = Clemory(self.arch, root=True)
dummy_clemory.add_backer(self.linked_base, self.memory)
return dummy_clemory
def __parse_prstatus(self, desc):
"""
Parse out the prstatus, accumulating the general purpose register values.
Supports AMD64, X86, ARM, AArch64, MIPS and MIPSEL at the moment.
:param prstatus: a note object of type NT_PRSTATUS.
"""
# TODO: support all architectures angr supports
arch_bytes = self.arch.bytes
if arch_bytes == 4:
fmt = "I"
elif arch_bytes == 8:
fmt = "Q"
else:
raise CLEError("Architecture must have a bitwidth of either 64 or 32")
end = ">" if self.arch.memory_endness == "Iend_BE" else "<"
pos = 0
def read_longs(n):
fin = pos + n * arch_bytes
return (fin, *struct.unpack(end + fmt * n, desc[pos:fin]))
def read_ints(n):
fin = pos + n * 4
return (fin, *struct.unpack(end + "I" * n, desc[pos:fin]))
def read_timeval():
sec, usec = struct.unpack(end + fmt * 2, desc[pos : pos + 2 * arch_bytes])
return (pos + 2 * arch_bytes, sec * 1000000 + usec)
result = {}
pos, result["si_signo"], result["si_code"], result["si_errno"] = read_ints(3)
# this field is a short, but it's padded to an int
(result["pr_cursig"],) = struct.unpack(end + "H", desc[pos : pos + 2])
pos += 4
pos, result["pr_sigpend"], result["pr_sighold"] = read_longs(2)
pos, result["pr_pid"], result["pr_ppid"], result["pr_pgrp"], result["pr_sid"] = read_ints(4)
pos, result["pr_utime_usec"] = read_timeval()
pos, result["pr_stime_usec"] = read_timeval()
pos, result["pr_cutime_usec"] = read_timeval()
pos, result["pr_cstime_usec"] = read_timeval()
# parse out general purpose registers
if self.arch.name == "AMD64":
# register names as they appear in dump
rnames = [
"r15",
"r14",
"r13",
"r12",
"rbp",
"rbx",
"r11",
"r10",
"r9",
"r8",
"rax",
"rcx",
"rdx",
"rsi",
"rdi",
"xxx",
"rip",
"cs",
"eflags",
"rsp",
"ss",
"fs_base",
"gs_base",
"ds",
"es",
"xxx",
"xxx",
]
nreg = 27
elif self.arch.name == "X86":
rnames = [
"ebx",
"ecx",
"edx",
"esi",
"edi",
"ebp",
"eax",
"ds",
"es",
"fs",
"gs",
"xxx",
"eip",
"cs",
"eflags",
"esp",
"ss",
]
nreg = 17
elif self.arch.name == "ARMHF" or self.arch.name == "ARMEL":
rnames = [
"r0",
"r1",
"r2",
"r3",
"r4",
"r5",
"r6",
"r7",
"r8",
"r9",
"r10",
"r11",
"r12",
"r13",
"r14",
"r15",
"xxx",
"xxx",
]
nreg = 18
elif self.arch.name == "AARCH64":
rnames = ["x%d" % i for i in range(32)]
rnames.append("pc")
rnames.append("xxx")
nreg = 34
elif self.arch.name == "MIPS32":
rnames = [
"xxx",
"xxx",
"xxx",
"xxx",
"xxx",
"xxx",
"zero",
"at",
"v0",
"v1",
"a0",
"a1",
"a2",
"a3",
"t0",
"t1",
"t2",
"t3",
"t4",
"t5",
"t6",
"t7",
"s0",
"s1",
"s2",
"s3",
"s4",
"s5",
"s6",
"s7",
"t8",
"t9",
"k0",
"k1",
"gp",
"sp",
"s8",
"ra",
"lo",
"hi",
"pc",
"bad",
"sr",
"status",
"cause",
]
nreg = 45
else:
raise CLECompatibilityError("Architecture '%s' unsupported by ELFCore" % self.arch.name)
assert nreg == len(rnames), "Please create an issue with this core-file attached to get this fixed."
pos, *regvals = read_longs(nreg)
result["registers"] = dict(zip(rnames, regvals))
del result["registers"]["xxx"]
pos, result["pr_fpvalid"] = read_ints(1)
assert (
pos <= len(desc) < pos + arch_bytes
), "Please create an issue with this core-file attached to get this fixed."
self.__current_thread.update(result)
def __parse_prpsinfo(self, desc):
pr_fname = desc.pr_fname.split(b"\x00", 1)[0]
try:
self.pr_fname = pr_fname.decode()
except UnicodeDecodeError:
self.pr_fname = repr(pr_fname)
def __parse_files(self, desc):
self._page_size = desc.page_size
self.filename_lookup = [
(ent.vm_start, ent.vm_end, ent.page_offset * desc.page_size, self._remote_file_mapper(fn.decode()))
for ent, fn in zip(desc.Elf_Nt_File_Entry, desc.filename)
]
def __parse_x86_tls(self, desc):
self.__current_thread["segments"] = {}
for offset in range(0, len(desc), 4 * 4):
index, base, limit, flags = struct.unpack_from("4I", desc, offset)
self.__current_thread["segments"][index] = (base, limit, flags)
def __parse_auxv(self, desc):
for offset in range(0, len(desc), self.arch.bytes * 2):
code = struct.unpack_from(self.arch.struct_fmt(), desc, offset)[0]
value = struct.unpack_from(self.arch.struct_fmt(), desc, offset + self.arch.bytes)[0]
code_str = auxv_codes.get(code, code)
if code_str == "AT_RANDOM":
value = self.__dummy_clemory.load(value, 0x10)
elif code_str in ("AT_EXECFN", "AT_PLATFORM"):
pos = value
value = bytearray()
while True:
byte = self.__dummy_clemory[pos]
if byte == 0:
break
value.append(byte)
pos += 1
value = bytes(value)
self.auxv[code_str] = value
def __reload_children(self):
self.loader.page_size = self._page_size
self.loader._perform_relocations = False
# hack: we are using a loader internal method in a non-kosher way which will cause our children to be
# marked as the main binary if we are also the main binary
# work around this by setting ourself here:
if self.loader.main_object is None:
self.loader.main_object = self
child_patches = defaultdict(list)
for vm_start, vm_end, offset, filename in self.filename_lookup:
try:
patch_data = self.__dummy_clemory.load(vm_start, vm_end - vm_start)
except KeyError:
pass
else:
child_patches[filename].append((vm_start, offset, patch_data))
remaining_segments = list(self.segments)
for filename, patches in child_patches.items():
try:
with open(filename, "rb") as fp:
obj = self.loader._load_object_isolated(fp)
except (FileNotFoundError, CLECompatibilityError) as ex:
if isinstance(ex, FileNotFoundError):
l.warning(
"Dependency %s does not exist on the current system; this core may be incomplete.", filename
)
elif isinstance(ex, CLECompatibilityError):
l.warning("Could not find a compatible loader for %s; this core may be incomplete.", filename)
else:
l.warning("Could not load %s; this core may be incomplete.", filename)
if self.loader.main_object is self:
self.loader.main_object = None
self.child_objects.clear()
return
# several ways to try to match the NT_FILE entries to the object
# (not trivial because offsets can be mapped multiple places)
# (and because there's no clear pattern for how mappings are included or omitted)
base_addr = None
# try one: use the delta between each allocation as a signature (works when the text segment is missing)
if base_addr is None:
vm_starts = [a for a, _, _ in patches]
vm_deltas = [b - a for a, b in zip(vm_starts, vm_starts[1:])]
segment_starts = [seg.vaddr for seg in obj.segments]
segment_deltas = [b - a for a, b in zip(segment_starts, segment_starts[1:])]
# funky lil algorithm to find substrings
for match_idx in range(len(segment_deltas) - len(vm_deltas) + 1):
for idx, vm_delta in enumerate(vm_deltas):
if vm_delta != segment_deltas[match_idx + idx]:
break
else:
base_addr = vm_starts[0] - AT.from_lva(obj.segments[match_idx].vaddr, obj).to_rva()
break
# try two: if the file is identity-mapped, it's easy (?)
if base_addr is None:
base_reccomendations = [a - b for a, b, _ in patches]
if all(a == base_reccomendations[0] for a in base_reccomendations):
base_addr = base_reccomendations[0]
# try three: if we have the zero offset then it's easy (?)
if base_addr is None:
if patches[0][1] == 0:
base_addr = patches[0][0]
if base_addr is None:
l.warning("Could not load %s (could not determine base); core may be incomplete", filename)
if self.loader.main_object is self:
self.loader.main_object = None
self.child_objects.clear()
return
obj._custom_base_addr = base_addr
self.child_objects.append(obj)
# figure out how the core's data should affect the child object's data
# iterate over all the core segments, since the only time we will need to make a change to the child's memory is if the core has something to say about it
# if there is ANY OVERLAP AT ALL, copy over the relevant data and nuke the segment
# then, if there is any part of the segment which DOESN'T correspond to a child segment, inject a new memory backer into the child for the relevant data
max_addr = base_addr + (obj.max_addr - obj.min_addr)
i = 0
while i < len(remaining_segments):
seg = remaining_segments[i]
# check for overlap (overapproximation)
if base_addr <= seg.vaddr <= max_addr or seg.vaddr <= base_addr < seg.vaddr + seg.memsize:
remaining_segments.pop(i)
# if there is data before the beginning of the child or after the end, make new artificial segments for it
if seg.vaddr < base_addr:
size = base_addr - seg.vaddr
remaining_segments.insert(i, Segment(seg.offset, seg.vaddr, size, size))
i += 1
if seg.max_addr > max_addr:
size = seg.max_addr - max_addr
offset = seg.memsize - size
remaining_segments.insert(i, Segment(seg.offset + offset, seg.vaddr + offset, size, size))
i += 1
# ohhhh this is SUCH a confusing address space-conversation problem!
# we're going to enumerate the contents of the core segment. at each point we find the relevant child backer. if this skips any content, inject a backer into the child.
# then, copy the contents of the core segment that overlaps the child backer.
cursor = max(0, base_addr - seg.vaddr)
while (
cursor < seg.filesize
): # use filesize and not memsize so we don't overwrite stuff with zeroes if it's omitted from the core
child_cursor = cursor + seg.vaddr - base_addr
try:
child_offset, child_backer = next(obj.memory.backers(child_cursor))
except StopIteration:
# is this right? is there any behavior we need to account for in the case that there is somehow no backer past a point mapped by the core?
break
# have we skipped any part of the core?
skip_size = child_offset - child_cursor
if skip_size > 0:
# inject it into the child
obj.memory.add_backer(
child_cursor,
self.memory.load(AT.from_mva(cursor + seg.vaddr, self).to_rva(), skip_size),
)
# how much of the child's segment have we skipped by starting at the beginning of the core segment?
child_backer_offset = max(0, -skip_size)
# how much of the core's segment have we skipped and handled via injection?
core_backer_offset = max(0, skip_size)
# how much can we copy?
copy_size = min(
len(child_backer) - child_backer_offset, seg.memsize - (cursor + core_backer_offset)
)
if copy_size > 0:
# do the copy if we have anything to copy
obj.memory.store(
child_offset + child_backer_offset,
self.memory.load(
AT.from_mva(seg.vaddr + cursor + core_backer_offset, self).to_rva(), copy_size
),
)
# advance cursor
cursor += core_backer_offset + copy_size
else:
i += 1
# for all remaining segments, make blobs out of them
mem = self.__dummy_clemory
for seg in remaining_segments:
if not seg.memsize:
continue
obj = Blob(
self.binary,
mem,
segments=[(seg.vaddr, seg.vaddr, seg.memsize)],
base_addr=seg.vaddr,
arch=self.arch,
entry_point=0,
force_rebase=True,
)
self.child_objects.append(obj)
self.mapped_base = 0
self._max_addr = 0
self.has_memory = False
if self.loader.main_object is self:
self.loader.main_object = None
self.__record_main_object()
def __record_main_object(self):
"""
If children objects are reloaded, identify the main object for later use by loader
"""
for obj in self.child_objects:
if self.pr_fname and obj.binary_basename.startswith(self.pr_fname):
self._main_object = obj
return
if self._main_filepath is not None and os.path.basename(self._main_filepath) == obj.binary_basename:
self._main_object = obj
return
l.warning("Failed to identify main object in ELFCore")
self._main_object = self
auxv_codes = {
0x0: "AT_NULL",
0x1: "AT_IGNORE",
0x2: "AT_EXECFD",
0x3: "AT_PHDR",
0x4: "AT_PHENT",
0x5: "AT_PHNUM",
0x6: "AT_PAGESZ",
0x7: "AT_BASE",
0x8: "AT_FLAGS",
0x9: "AT_ENTRY",
0xA: "AT_NOTELF",
0xB: "AT_UID",
0xC: "AT_EUID",
0xD: "AT_GID",
0xE: "AT_EGID",
0x11: "AT_CLKTCK",
0xF: "AT_PLATFORM",
0x10: "AT_HWCAP",
0x12: "AT_FPUCW",
0x13: "AT_DCACHEBSIZE",
0x14: "AT_ICACHEBSIZE",
0x15: "AT_UCACHEBSIZE",
0x16: "AT_IGNOREPPC",
0x17: "AT_SECURE",
0x18: "AT_BASE_PLATFORM",
0x19: "AT_RANDOM",
0x1A: "AT_HWCAP2",
0x1F: "AT_EXECFN",
0x20: "AT_SYSINFO",
0x21: "AT_SYSINFO_EHDR",
0x22: "AT_L1I_CACHESHAPE",
0x23: "AT_L1D_CACHESHAPE",
0x24: "AT_L2_CACHESHAPE",
0x25: "AT_L3_CACHESHAPE",
0x28: "AT_L1I_CACHESIZE",
0x29: "AT_L1I_CACHEGEOMETRY",
0x2A: "AT_L1D_CACHESIZE",
0x2B: "AT_L1D_CACHEGEOMETRY",
0x2C: "AT_L2_CACHESIZE",
0x2D: "AT_L2_CACHEGEOMETRY",
0x2E: "AT_L3_CACHESIZE",
0x2F: "AT_L3_CACHEGEOMETRY",
}
register_backend("elfcore", ELFCore)

View File

@@ -0,0 +1,118 @@
import struct
class ELFHashTable:
"""
Functions to do lookup from a HASH section of an ELF file.
Information: http://docs.oracle.com/cd/E23824_01/html/819-0690/chapter6-48031.html
"""
def __init__(self, symtab, stream, offset, arch):
"""
:param symtab: The symbol table to perform lookups from (as a pyelftools SymbolTableSection).
:param stream: A file-like object to read from the ELF's memory.
:param offset: The offset in the object where the table starts.
:param arch: The ArchInfo object for the ELF file.
"""
self.symtab = symtab
fmt = "<" if arch.memory_endness == "Iend_LE" else ">"
stream.seek(offset)
self.nbuckets, self.nchains = struct.unpack(fmt + "II", stream.read(8))
self.buckets = struct.unpack(fmt + "I" * self.nbuckets, stream.read(4 * self.nbuckets))
self.chains = struct.unpack(fmt + "I" * self.nchains, stream.read(4 * self.nchains))
def get(self, k):
"""
Perform a lookup. Returns a pyelftools Symbol object, or None if there is no match.
:param k: The string to look up.
"""
if self.nbuckets == 0:
return None, None
hval = self.elf_hash(k) % self.nbuckets
symndx = self.buckets[hval]
while symndx != 0:
sym = self.symtab.get_symbol(symndx)
if sym.name == k:
return symndx, sym
symndx = self.chains[symndx]
return None, None
# from http://www.partow.net/programming/hashfunctions/
@staticmethod
def elf_hash(key):
h = 0
x = 0
for c in key:
h = (h << 4) + ord(c)
x = h & 0xF0000000
if x != 0:
h ^= x >> 24
h &= ~x
return h
class GNUHashTable:
"""
Functions to do lookup from a GNU_HASH section of an ELF file.
Information: https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
"""
def __init__(self, symtab, stream, offset, arch):
"""
:param symtab: The symbol table to perform lookups from (as a pyelftools SymbolTableSection).
:param stream: A file-like object to read from the ELF's memory.
:param offset: The offset in the object where the table starts.
:param arch: The ArchInfo object for the ELF file.
"""
self.symtab = symtab
fmt = "<" if arch.memory_endness == "Iend_LE" else ">"
self.c = arch.bits
fmtsz = "I" if self.c == 32 else "Q"
stream.seek(offset)
data = stream.read(16)
self.nbuckets, self.symndx, self.maskwords, self.shift2 = struct.unpack(fmt + "IIII", data)
self.bloom = struct.unpack(fmt + fmtsz * self.maskwords, stream.read(self.c * self.maskwords // 8))
self.buckets = struct.unpack(fmt + "I" * self.nbuckets, stream.read(4 * self.nbuckets))
self.hash_ptr = stream.tell()
self.stream = stream
def _matches_bloom(self, H1):
C = self.c
H2 = H1 >> self.shift2
N = (H1 // C) & (self.maskwords - 1)
BITMASK = (1 << (H1 % C)) | (1 << (H2 % C))
return (self.bloom[N] & BITMASK) == BITMASK
def get(self, k):
"""
Perform a lookup. Returns a pyelftools Symbol object, or None if there is no match.
:param k: The string to look up
"""
h = self.gnu_hash(k)
if not self._matches_bloom(h):
return None, None
n = self.buckets[h % self.nbuckets]
if n == 0:
return None, None
while True:
sym = self.symtab.get_symbol(n)
if sym.name == k:
return n, sym
self.stream.seek(self.hash_ptr + 4 * (n - self.symndx))
if struct.unpack("I", self.stream.read(4))[0] & 1 == 1:
break
n += 1
return None, None
@staticmethod
def gnu_hash(key):
h = 5381
for c in key:
h = h * 33 + ord(c)
return h & 0xFFFFFFFF

View File

@@ -0,0 +1,172 @@
"""
References:
- http://www.hexblog.com/wp-content/uploads/2012/06/Recon-2012-Skochinsky-Compiler-Internals.pdf
- https://www.airs.com/blog/archives/460
- https://www.airs.com/blog/archives/464
"""
from typing import List # pylint:disable=unused-import
from elftools.common.utils import struct_parse
from elftools.dwarf.enums import DW_EH_encoding_flags
from elftools.dwarf.structs import DWARFStructs, Struct
class ExceptionTableHeader:
__slots__ = (
"lp_start",
"ttype_encoding",
"ttype_offset",
"call_site_encoding",
"call_site_table_len",
)
def __init__(self, lp_start, ttype_encoding, ttype_offset, call_site_encoding, call_site_table_len):
self.lp_start = lp_start # landing pad start offset
self.ttype_encoding = ttype_encoding # encoding of pointers in type table
self.ttype_offset = ttype_offset # type table offset
self.call_site_encoding = call_site_encoding # encoding of items in call site table
self.call_site_table_len = call_site_table_len # total length of call site table
class CallSiteEntry:
__slots__ = ("cs_start", "cs_len", "cs_lp", "cs_action")
def __init__(self, cs_start, cs_len, cs_lp, cs_action):
self.cs_start = cs_start
self.cs_len = cs_len
self.cs_lp = cs_lp
self.cs_action = cs_action
class LSDAExceptionTable:
"""
LSDA exception table parser.
TODO: Much of this class should be eventually moved to pyelftools.
"""
def __init__(self, stream, bits, little_endian=True):
self.address = None
self.base_offset = None
self.stream = stream
if bits in (32, 64):
dwarf_format = bits
else:
raise ValueError("Unsupported bits value %d. Expect either 32 or 64." % bits)
self.entry_structs = DWARFStructs(
little_endian=little_endian, dwarf_format=dwarf_format, address_size=bits // 8
)
self._formats = self._eh_encoding_to_field(self.entry_structs)
@staticmethod
def _eh_encoding_to_field(entry_structs):
"""
Shamelessly copied from pyelftools since the original method is a bounded method.
Return a mapping from basic encodings (DW_EH_encoding_flags) the
corresponding field constructors (for instance
entry_structs.Dwarf_uint32).
"""
return {
DW_EH_encoding_flags["DW_EH_PE_absptr"]: entry_structs.Dwarf_target_addr,
DW_EH_encoding_flags["DW_EH_PE_uleb128"]: entry_structs.Dwarf_uleb128,
DW_EH_encoding_flags["DW_EH_PE_udata2"]: entry_structs.Dwarf_uint16,
DW_EH_encoding_flags["DW_EH_PE_udata4"]: entry_structs.Dwarf_uint32,
DW_EH_encoding_flags["DW_EH_PE_udata8"]: entry_structs.Dwarf_uint64,
DW_EH_encoding_flags["DW_EH_PE_sleb128"]: entry_structs.Dwarf_sleb128,
DW_EH_encoding_flags["DW_EH_PE_sdata2"]: entry_structs.Dwarf_int16,
DW_EH_encoding_flags["DW_EH_PE_sdata4"]: entry_structs.Dwarf_int32,
DW_EH_encoding_flags["DW_EH_PE_sdata8"]: entry_structs.Dwarf_int64,
}
def parse_lsda(self, address, offset):
self.address = address
self.base_offset = offset
self.stream.seek(offset)
header = self._parse_lsda_header()
csrs = [] # type: List[CallSiteEntry]
start_offset = self.stream.tell()
while self.stream.tell() - start_offset < header.call_site_table_len:
csr = self._parse_call_site_entry(header.call_site_encoding)
if csr is not None:
csrs.append(csr)
return csrs
def _parse_lsda_header(self):
# lpstart
lpstart_encoding = self.stream.read(1)[0]
if lpstart_encoding != DW_EH_encoding_flags["DW_EH_PE_omit"]:
base_encoding = lpstart_encoding & 0x0F
modifier = lpstart_encoding & 0xF0
lpstart = struct_parse(Struct("dummy", self._formats[base_encoding]("LPStart")), self.stream)["LPStart"]
if modifier == 0:
pass
elif modifier == DW_EH_encoding_flags["DW_EH_PE_pcrel"]:
lpstart += self.address + (self.stream.tell() - self.base_offset)
else:
raise NotImplementedError("Unsupported modifier %#x." % modifier)
else:
lpstart = None
# ttype
ttype_encoding = self.stream.read(1)[0]
if ttype_encoding != DW_EH_encoding_flags["DW_EH_PE_omit"]:
ttype_offset = struct_parse(Struct("dummy", self.entry_structs.Dwarf_uleb128("TType")), self.stream)[
"TType"
]
else:
ttype_offset = None
# call site table length
cstable_encoding = self.stream.read(1)[0]
cstable_length = struct_parse(Struct("dummy", self.entry_structs.Dwarf_uleb128("CSTable")), self.stream)[
"CSTable"
]
return ExceptionTableHeader(
lpstart,
ttype_encoding,
ttype_offset,
cstable_encoding,
cstable_length,
)
def _parse_call_site_entry(self, encoding):
base_encoding = encoding & 0x0F
modifier = encoding & 0xF0
# header
s = struct_parse(
Struct(
"CallSiteEntry",
self._formats[base_encoding]("cs_start"),
self._formats[base_encoding]("cs_len"),
self._formats[base_encoding]("cs_lp"),
self.entry_structs.Dwarf_uleb128("cs_action"),
),
self.stream,
)
cs_start = s["cs_start"]
cs_len = s["cs_len"]
cs_lp = s["cs_lp"]
cs_action = s["cs_action"]
if modifier == 0:
pass
else:
raise NotImplementedError("Unsupported modifier for CallSiteEntry: %#x." % modifier)
return CallSiteEntry(cs_start, cs_len, cs_lp, cs_action)

View File

@@ -0,0 +1,495 @@
import pyvex
import elftools
import os
import logging
from .. import Backend
from ..symbol import SymbolType
from ...address_translator import AT
from ...utils import stream_or_path
from elftools.elf.descriptions import describe_ei_osabi
from elftools.elf.dynamic import DynamicSection
from elftools.elf.enums import ENUM_DT_FLAGS
from enum import Enum
from collections import OrderedDict
__all__ = ("MetaELF",)
l = logging.getLogger(name=__name__)
class Relro(Enum):
NONE = 0
PARTIAL = 1
FULL = 2
def maybedecode(string):
# so... it turns out that pyelftools is garbage and will transparently give you either strings or bytestrings
# based on pretty much nothing whatsoever
return string if type(string) is str else string.decode()
def get_relro(elf):
# The tests for partial and full RELRO have been taken from
# checksec.sh v1.5 (https://www.trapkit.de/tools/checksec/):
# - Partial RELRO has a 'GNU_RELRO' segment
# - Full RELRO also has a 'BIND_NOW' flag in the dynamic section
if not any(seg.header.p_type == "PT_GNU_RELRO" for seg in elf.iter_segments()):
return Relro.NONE
dyn_sec = elf.get_section_by_name(".dynamic")
if dyn_sec is None or not isinstance(dyn_sec, DynamicSection):
return Relro.PARTIAL
flags = [tag for tag in dyn_sec.iter_tags() if tag.entry.d_tag == "DT_FLAGS"]
if len(flags) != 1:
return Relro.PARTIAL
return (
Relro.FULL
if flags[0].entry.d_val & ENUM_DT_FLAGS["DF_BIND_NOW"] == ENUM_DT_FLAGS["DF_BIND_NOW"]
else Relro.PARTIAL
)
class MetaELF(Backend):
"""
A base class that implements functions used by all backends that can load an ELF.
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
tmp_reader = elftools.elf.elffile.ELFFile(self._binary_stream)
self.os = describe_ei_osabi(tmp_reader.header.e_ident.EI_OSABI)
self.elfflags = tmp_reader.header.e_flags
self.relro = get_relro(tmp_reader)
self._plt = {}
self._ppc64_abiv1_initial_rtoc = None
self._cached_plt = None
self._cached_reverse_plt = None
supported_filetypes = ["elf"]
def _block(self, addr, skip_stmts=False):
# for sanity checking. we live in the world of heuristics now.
thumb = self.arch.name.startswith("ARM") and addr % 2 == 1
realaddr = addr
if thumb:
realaddr -= 1
dat = self._block_bytes(realaddr, 40)
return pyvex.IRSB(dat, addr, self.arch, bytes_offset=1 if thumb else 0, opt_level=1, skip_stmts=skip_stmts)
def _block_bytes(self, addr, size):
return self.memory.load(AT.from_lva(addr, self).to_rva(), size)
def _block_references_addr(self, block, addr):
if addr in [c.value for c in block.all_constants]:
return True
if self.arch.name != "X86":
return False
# search for tX = GET(ebx) -> Add32(tX, got_addr - addr)
if ".got.plt" in self.sections_map:
got_sec = self.sections_map[".got.plt"]
elif self.relro is Relro.FULL:
got_sec = self.sections_map[".got"]
else:
return False
tx = None
for stmt in block.statements:
if (
stmt.tag == "Ist_WrTmp"
and stmt.data.tag == "Iex_Get"
and stmt.data.offset == self.arch.registers["ebx"][0]
):
tx = stmt.tmp
if (
tx is not None
and stmt.tag == "Ist_WrTmp"
and stmt.data.tag == "Iex_Binop"
and stmt.data.op == "Iop_Add32"
):
args = sorted(stmt.data.args, key=str)
if (
args[0].tag == "Iex_Const"
and args[0].con.value == addr - got_sec.vaddr
and args[1].tag == "Iex_RdTmp"
and args[1].tmp == tx
):
return True
return False
def _add_plt_stub(self, name, addr, sanity_check=True):
# addr is an LVA
if addr <= 0:
return False
target_addr = self.jmprel[name].linked_addr
try:
if sanity_check and not self._block_references_addr(self._block(addr), target_addr):
return False
except (pyvex.PyVEXError, KeyError):
return False
else:
self._plt[name] = AT.from_lva(addr, self).to_rva()
return True
def _load_plt(self):
# The main problem here is that there's literally no good way to do this.
# like, I read through the binutils source and they have a hacked up solution for each arch
# that performs actual comparisons against the machine code in the plt section.
# it's pretty bad.
# we sanity-check all our attempts by requiring that the block lifted at the given address
# references the GOT slot for the symbol.
plt_secs = []
if ".plt" in self.sections_map:
plt_secs = [self.sections_map[".plt"]]
if ".plt.got" in self.sections_map:
plt_secs = [self.sections_map[".plt.got"]]
if ".MIPS.stubs" in self.sections_map:
plt_secs = [self.sections_map[".MIPS.stubs"]]
if ".plt.sec" in self.sections_map:
plt_secs.append(self.sections_map[".plt.sec"])
self.jmprel = OrderedDict(sorted(self.jmprel.items(), key=lambda x: x[1].linked_addr))
func_jmprel = OrderedDict(
(k, v)
for k, v in self.jmprel.items()
if v.symbol.type not in (SymbolType.TYPE_OBJECT, SymbolType.TYPE_SECTION, SymbolType.TYPE_OTHER)
)
# ATTEMPT 1: some arches will just leave the plt stub addr in the import symbol
if self.arch.name in ("ARM", "ARMEL", "ARMHF", "ARMCortexM", "AARCH64", "MIPS32", "MIPS64"):
for name, reloc in func_jmprel.items():
if not plt_secs or any(plt_sec.contains_addr(reloc.symbol.linked_addr) for plt_sec in plt_secs):
self._add_plt_stub(name, reloc.symbol.linked_addr, sanity_check=bool(plt_secs))
# ATTEMPT 2: on intel chips the data in the got slot pre-relocation points to a lazy-resolver
# stub immediately after the plt stub
if self.arch.name in ("X86", "AMD64"):
for name, reloc in func_jmprel.items():
try:
self._add_plt_stub(name, self.memory.unpack_word(reloc.relative_addr) - 6, sanity_check=True)
except KeyError:
pass
# do another sanity check
if len(set(self._plt.values())) != len(self._plt):
self._plt = {}
# ATTEMPT 3: one ppc scheme I've seen is that there are 16-byte stubs packed together
# right before the resolution stubs.
if self.arch.name in ("PPC32",):
resolver_stubs = sorted(
(self.memory.unpack_word(reloc.relative_addr), name) for name, reloc in func_jmprel.items()
)
if resolver_stubs:
stubs_table = resolver_stubs[0][0] - 16 * len(resolver_stubs)
for i, (_, name) in enumerate(resolver_stubs):
self._add_plt_stub(name, stubs_table + i * 16)
if len(self._plt) == len(func_jmprel):
# real quick, bail out before shit hits the fan
return
# ATTEMPT 4:
# ok. time to go in on this.
# try to find a single plt stub, anywhere. if we already have one, use that, otherwise
# try to scan forward from _start to __libc_start_main to find that one.
# then, scan forward and backward from that stub to find the rest of them. yikes!
# keep a timer so we don't get stuck. keep this short and sweet.
def tick():
tick.bailout_timer -= 1
if tick.bailout_timer <= 0:
raise TimeoutError()
tick.bailout_timer = 5
def scan_forward(addr, name, push=False):
names = [name] if type(name) not in (list, tuple) else name
def block_is_good(blk):
all_constants = {c.value for c in blk.all_constants}
for name in names:
gotslot = func_jmprel[name].linked_addr
if gotslot in all_constants:
block_is_good.name = name
return True
return False
block_is_good.name = None
def is_endbr(addr):
if self.arch.name not in ("X86", "AMD64"):
return False
return self._block_bytes(addr, 4) in (b"\xf3\x0f\x1e\xfa", b"\xf3\x0f\x1e\xfb")
instruction_alignment = self.arch.instruction_alignment
if self.arch.name in ("ARMEL", "ARMHF"):
# hard code alignment for ARM code
instruction_alignment = 4
try:
while True:
tick()
bb = self._block(addr, skip_stmts=False)
step_forward = False
# the block shouldn't touch any cc_* registers
if self.arch.name in ("X86", "AMD64", "ARMEL", "ARMHF", "ARMCortexM"):
cc_regs = {
self.arch.registers["cc_op"][0],
self.arch.registers["cc_ndep"][0],
self.arch.registers["cc_dep1"][0],
self.arch.registers["cc_dep2"][0],
}
if any(
[isinstance(stmt, pyvex.IRStmt.Put) and stmt.offset in cc_regs for stmt in bb.statements]
):
step_forward = True
elif any(
[
isinstance(stmt, pyvex.IRStmt.WrTmp)
and isinstance(stmt.data, pyvex.IRExpr.Get)
and stmt.data.offset in cc_regs
for stmt in bb.statements
]
):
step_forward = True
if step_forward:
# only steps one instruction forward
addr += instruction_alignment
continue
if block_is_good(bb):
break
if bb.jumpkind == "Ijk_NoDecode":
addr += instruction_alignment
else:
addr += bb.size
# "push" means try to increase the address as far as we can without regard for semantics
# the alternative is to only try to lop off nop instructions
# make sure we don't push through endbr
if push:
if block_is_good.name is None:
raise ValueError("block_is_good.name cannot be None.")
old_name = block_is_good.name
block = self._block(addr, skip_stmts=True)
if len(block.instruction_addresses) > 1 and not is_endbr(block.instruction_addresses[0]):
for instruction in block.instruction_addresses[1:]:
candidate_block = self._block(instruction, skip_stmts=False)
if block_is_good(candidate_block) and block_is_good.name == old_name:
addr = candidate_block.addr
if is_endbr(instruction):
break
else:
break
block_is_good.name = old_name
else:
cont = True
while cont:
cont = False
seen_imark = False
# we need to access bb.statements
if bb.statements is None:
# relift without skipping statements
bb = self._block(bb.addr, skip_stmts=False)
for stmt in bb.statements:
if stmt.tag == "Ist_IMark":
if seen_imark:
# good????
bb = self._block(stmt.addr, skip_stmts=False)
if block_is_good(bb):
addr = stmt.addr
cont = True
break
else:
seen_imark = True
elif stmt.tag == "Ist_Put" and stmt.offset == bb.offsIP:
continue
else:
# there's some behavior, not good
break
return self._add_plt_stub(block_is_good.name, addr)
except (TimeoutError, ValueError, KeyError, pyvex.PyVEXError):
return False
if not self._plt and "__libc_start_main" in func_jmprel and self.entry != 0:
# try to scan forward through control flow to find __libc_start_main!
try:
last_jk = None
addr = self.entry
bb = self._block(addr, skip_stmts=True)
target = bb.default_exit_target
while target is not None:
tick()
last_jk = bb.jumpkind
addr = target
bb = self._block(addr, skip_stmts=True)
target = bb.default_exit_target
if last_jk == "Ijk_Call":
self._add_plt_stub("__libc_start_main", addr)
except (TimeoutError, KeyError, pyvex.PyVEXError):
pass
# if func_jmprel.keys()[0] not in self._plt:
if not set(func_jmprel.keys()).intersection(self._plt.keys()):
# Check if we have a .plt section
if not plt_secs:
# WAHP WAHP
return
# some binaries have a bunch of CET stubs before the PLTs, and
# in the worst case we might have to skip over each one of
# these... so we set the bailout timer accordingly
def initial_bailout_timer(func_jmprel):
return len(func_jmprel) + 5
if plt_secs:
# LAST TRY: Find the first block to references ANY GOT slot
tick.bailout_timer = initial_bailout_timer(func_jmprel)
scan_forward(min(plt_sec.vaddr for plt_sec in plt_secs), list(func_jmprel.keys()), push=True)
if not self._plt:
# \(_^^)/
return
# if we've gotten this far there is at least one plt slot address known, guaranteed.
plt_hitlist = [
(name, AT.from_rva(self._plt[name], self).to_lva() if name in self._plt else None) for name in func_jmprel
]
name, addr = plt_hitlist[0]
if addr is None and plt_secs:
# try to resolve the very first entry
tick.bailout_timer = initial_bailout_timer(func_jmprel)
guessed_addr = min(plt_sec.vaddr for plt_sec in plt_secs)
scan_forward(guessed_addr, name, push=True)
if name in self._plt:
# resolved :-)
plt_hitlist[0] = (name, AT.from_rva(self._plt[name], self).to_lva())
next_addr = None
for i, (name, addr) in enumerate(plt_hitlist):
if addr is None:
if next_addr is None:
continue
tick.bailout_timer = 5
scan_forward(next_addr, name, push=True)
if name in self._plt:
addr = AT.from_rva(self._plt[name], self).to_lva()
if addr is not None:
b0 = self._block(addr, skip_stmts=True)
stub_size = b0.size
if isinstance(b0.next, pyvex.expr.Const) and b0.next.con.value == addr + b0.size:
b1 = self._block(addr + b0.size, skip_stmts=True)
stub_size += b1.size
next_addr = addr + stub_size
@property
def plt(self):
"""
Maps names to addresses.
"""
if self._cached_plt is None:
self._cached_plt = {k: AT.from_rva(self._plt[k], self).to_mva() for k in self._plt}
return self._cached_plt
@property
def reverse_plt(self):
"""
Maps addresses to names.
"""
if self._cached_reverse_plt is None:
self._cached_reverse_plt = {AT.from_rva(self._plt[k], self).to_mva(): k for k in self._plt}
return self._cached_reverse_plt
@property
def is_ppc64_abiv1(self):
"""
Returns whether the arch is PowerPC64 ABIv1.
:return: True if PowerPC64 ABIv1, False otherwise.
"""
return self.arch.name == "PPC64" and self.elfflags & 3 < 2
@property
def is_ppc64_abiv2(self):
"""
Returns whether the arch is PowerPC64 ABIv2.
:return: True if PowerPC64 ABIv2, False otherwise.
"""
return self.arch.name == "PPC64" and self.elfflags & 3 == 2
@property
def ppc64_initial_rtoc(self):
"""
Get initial rtoc value for PowerPC64 architecture.
"""
if self.is_ppc64_abiv1:
return self._ppc64_abiv1_initial_rtoc
elif self.is_ppc64_abiv2:
return self._ppc64_abiv2_get_initial_rtoc()
else:
return None
def _ppc64_abiv1_entry_fix(self):
"""
On PowerPC64, the e_flags elf header entry's lowest two bits determine the ABI type. in ABIv1, the entry point
given in the elf headers is not actually the entry point, but rather the address in memory where there
exists a pointer to the entry point.
Utter bollocks, but this function should fix it.
"""
if self.is_ppc64_abiv1:
ep_offset = self._entry
self._entry = self.memory.unpack_word(AT.from_lva(ep_offset, self).to_rva())
self._ppc64_abiv1_initial_rtoc = self.memory.unpack_word(AT.from_lva(ep_offset + 8, self).to_rva())
def _ppc64_abiv2_get_initial_rtoc(self):
"""
Guess initial table of contents value for PPC64 based on .got section.
According to PPC64 ABIv2 Specification (Section 3.3): "the TOC pointer
register typically points to the beginning of the .got section +
0x8000." Guess the initial rtoc value based on that to handle the
typical case.
"""
got_section = self.sections_map.get(".got", None)
if got_section is None:
l.warning("Failed to guess initial rtoc value due to missing .got")
return None
return got_section.vaddr + 0x8000
@staticmethod
def extract_soname(path):
with stream_or_path(path) as f:
try:
e = elftools.elf.elffile.ELFFile(f)
for seg in e.iter_segments():
if seg.header.p_type == "PT_NULL":
break
elif seg.header.p_type == "PT_DYNAMIC":
for tag in seg.iter_tags():
if tag.entry.d_tag == "DT_SONAME":
return maybedecode(tag.soname)
if type(path) is str:
return os.path.basename(path)
except elftools.common.exceptions.ELFError:
pass
return None
@staticmethod
def get_text_offset(path):
"""
Offset of .text in the binary.
"""
with stream_or_path(path) as f:
e = elftools.elf.elffile.ELFFile(f)
return e.get_section_by_name(".text").header.sh_offset

View File

@@ -0,0 +1,89 @@
from ..region import Segment, Section
def maybedecode(string):
return string if type(string) is str else string.decode()
class ELFSegment(Segment):
"""
Represents a segment for the ELF format.
"""
def __init__(self, readelf_seg, relro=False):
self.flags = readelf_seg.header.p_flags
self.relro = relro
super().__init__(
readelf_seg.header.p_offset,
readelf_seg.header.p_vaddr,
readelf_seg.header.p_filesz,
readelf_seg.header.p_memsz,
)
@property
def is_readable(self):
return self.flags & 4 != 0
@property
def is_writable(self):
return self.flags & 2 != 0
@property
def is_executable(self):
return self.flags & 1 != 0
@property
def is_relro(self):
return self.relro
class ELFSection(Section):
SHF_WRITE = 0x1
SHF_ALLOC = 0x2
SHF_EXECINSTR = 0x4
SHF_STRINGS = 0x20
SHT_NULL = "SHT_NULL"
def __init__(self, readelf_sec, remap_offset=0):
super().__init__(
maybedecode(readelf_sec.name),
readelf_sec.header.sh_offset,
readelf_sec.header.sh_addr + remap_offset,
readelf_sec.header.sh_size,
)
self.type = readelf_sec.header.sh_type
self.entsize = readelf_sec.header.sh_entsize
self.flags = readelf_sec.header.sh_flags
self.link = readelf_sec.header.sh_link
self.info = readelf_sec.header.sh_info
self.align = readelf_sec.header.sh_addralign
self.remap_offset = remap_offset
@property
def is_readable(self):
return True
@property
def is_active(self):
return self.type != self.SHT_NULL
@property
def is_writable(self):
return self.flags & self.SHF_WRITE != 0
@property
def occupies_memory(self):
return self.flags & self.SHF_ALLOC != 0 and self.memsize > 0
@property
def is_executable(self):
return self.flags & self.SHF_EXECINSTR != 0
@property
def is_strings(self):
return self.flags & self.SHF_STRINGS != 0
@property
def only_contains_uninitialized_data(self):
return self.type == "SHT_NOBITS"

View File

@@ -0,0 +1,52 @@
import os
import logging
import importlib
import archinfo
from collections import defaultdict
from ...relocation import Relocation
ALL_RELOCATIONS = defaultdict(dict)
complaint_log = set()
path = os.path.dirname(os.path.abspath(__file__))
l = logging.getLogger(name=__name__)
def load_relocations():
for filename in os.listdir(path):
if not filename.endswith(".py"):
continue
if filename == "__init__.py":
continue
l.debug("Importing ELF relocation module: %s", filename[:-3])
module = importlib.import_module(".%s" % filename[:-3], "cle.backends.elf.relocation")
try:
arch_name = module.arch
except AttributeError:
continue
for item_name in dir(module):
if item_name not in archinfo.defines:
continue
item = getattr(module, item_name)
if not isinstance(item, type) or not issubclass(item, Relocation):
continue
ALL_RELOCATIONS[arch_name][archinfo.defines[item_name]] = item
def get_relocation(arch, r_type):
if r_type == 0:
return None
try:
return ALL_RELOCATIONS[arch][r_type]
except KeyError:
if (arch, r_type) not in complaint_log:
complaint_log.add((arch, r_type))
l.warning("Unknown reloc %d on %s", r_type, arch)
return None
load_relocations()

View File

@@ -0,0 +1,69 @@
import logging
from . import generic
l = logging.getLogger(name=__name__)
arch = "AMD64"
class R_X86_64_64(generic.GenericAbsoluteAddendReloc):
pass
class R_X86_64_COPY(generic.GenericCopyReloc):
pass
class R_X86_64_RELATIVE(generic.GenericRelativeReloc):
pass
class R_X86_64_IRELATIVE(generic.GenericIRelativeReloc):
pass
class R_X86_64_GLOB_DAT(generic.GenericJumpslotReloc):
pass
class R_X86_64_JUMP_SLOT(generic.GenericJumpslotReloc):
pass
class R_X86_64_DTPMOD64(generic.GenericTLSModIdReloc):
pass
class R_X86_64_DTPOFF64(generic.GenericTLSDoffsetReloc):
pass
class R_X86_64_TPOFF64(generic.GenericTLSOffsetReloc):
pass
class R_X86_64_PC32(generic.RelocTruncate32Mixin, generic.GenericPCRelativeAddendReloc):
check_sign_extend = True
class R_X86_64_32(generic.RelocTruncate32Mixin, generic.GenericAbsoluteAddendReloc):
check_zero_extend = True
class R_X86_64_32S(generic.RelocTruncate32Mixin, generic.GenericAbsoluteAddendReloc):
check_sign_extend = True
class R_X86_64_PLT32(generic.RelocTruncate32Mixin, generic.GenericPCRelativeAddendReloc):
check_sign_extend = True
class R_X86_64_GOTPCREL(generic.RelocGOTMixin, generic.RelocTruncate32Mixin, generic.GenericPCRelativeAddendReloc):
check_sign_extend = True
class R_X86_64_GOTPCRELX(generic.RelocGOTMixin, generic.RelocTruncate32Mixin, generic.GenericPCRelativeAddendReloc):
check_sign_extend = True
class R_X86_64_REX_GOTPCRELX(generic.RelocGOTMixin, generic.RelocTruncate32Mixin, generic.GenericPCRelativeAddendReloc):
check_sign_extend = True

View File

@@ -0,0 +1,489 @@
import logging
from . import generic
from .elfreloc import ELFReloc
from ....errors import CLEOperationError
l = logging.getLogger(name=__name__)
arch = "ARM"
# Reference: "ELF for the ARM Architecture ABI r2.10"
# http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044e/IHI0044E_aaelf.pdf
def _applyReloc(inst, result, mask=0xFFFFFFFF):
"""
Applies the specified mask to the relocation and verifies that the mask
is valid for the given result.
"""
try:
if result & ~mask:
raise ValueError("result & ~mask is not 0.")
except ValueError as ex:
l.warning("Relocation failed: %r", ex)
return 0 # worst case, you hook it yourself
return (inst & ~mask) | (result & mask) # pylint: disable=superfluous-parens
def _isThumbFunc(symbol, addr):
"""
Checks whether the provided symbol and address is a Thumb function by
verifying the LSB is 1 and the symbol is STT_FUNC.
"""
return (addr % 2 == 1) and symbol.is_function
class R_ARM_CALL(ELFReloc):
"""
Relocate R_ARM_CALL symbols via instruction modification. It additionally
handles R_ARM_PC24 and R_ARM_JUMP24. The former is deprecated and is now
just the same as R_ARM_CALL.
R_ARM_JUMP24 doesn't need the Thumb check. Technically, if the Thumb check
succeeds on R_ARM_JUMP24, it's a bad call that shouldn't have been generated
by the linker, so we may as well as just treat it like R_ARM_CALL.
- Class: Static
- Type: ARM (R_ARM_CALL, R_ARM_JUMP24); Deprecated (R_ARM_PC24)
- Code: 1 (R_ARM_PC24), 28 (R_ARM_CALL), 29 (R_ARM_JUMP24)
- Operation: ((S + A) | T) - P
- S is the address of the symbol
- A is the addend
- P is the target location (place being relocated)
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
P = self.rebased_addr # Location of this instruction
A = inst = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
if inst & 0x00800000:
A |= 0xFF000000 # Sign extend to 32-bits
result = ((S + (A << 2)) | T) - P # Do the initial work
imm24 = (result & 0x03FFFFFE) >> 2 # Sign_extend(inst[25:2])
if T: # Do Thumb relocation
mask = 0xFF000000
bit_h = (result & 0x02) >> 1
result = _applyReloc(inst, (0xFA | bit_h), mask)
else: # Do ARM relocation
mask = 0xFFFFFF
result = _applyReloc(inst, imm24, mask)
l.debug("%s relocated as R_ARM_CALL with new instruction: %#x", self.symbol.name, result)
return result
class R_ARM_PREL31(ELFReloc):
"""
Relocate R_ARM_PREL31 symbols via instruction modification. The difference
between this and R_ARM_CALL/R_ARM_PC24/R_ARM_JUMP24 is that it's a data
relocation
- Class: Static
- Type: Data
- Code: 42
- Operation: ((S + A) | T) - P
- S is the address of the symbol
- A is the addend
- P is the target location (place being relocated)
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
P = self.rebased_addr # Location of this instruction
A = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
if A & 0x01000000:
A |= 0xF1000000 # Sign extend 31-bits
result = ((S + A) | T) - P # Do the initial work
mask = 0x7FFFFFFF
rel31 = result & mask
result = _applyReloc(A, rel31, mask)
l.debug("%s relocated as R_ARM_PREL31 to: 0x%x", self.symbol.name, result)
return result
class R_ARM_REL32(ELFReloc):
"""
Relocate R_ARM_REL32 symbols. This is essentially the same as
generic.GenericPCRelativeAddendReloc with the addition of a check
for whether or not the target is Thumb.
- Class: Static
- Type: Data
- Code: 3
- Operation: ((S + A) | T) - P
- S is the address of the symbol
- A is the addend
- P is the target location (place being relocated)
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
P = self.rebased_addr # Location of this instruction
A = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
result = ((S + A) | T) - P
l.debug("%s relocated as R_ARM_REL32 to: 0x%x", self.symbol.name, result)
return result
class R_ARM_ABS32(ELFReloc):
"""
Relocate R_ARM_ABS32 symbols. This is essentially the same as
generic.GenericAbsoluteAddendReloc with the addition of a check
for whether or not the target is Thumb.
- Class: Static
- Type: Data
- Code: 3
- Operation: (S + A) | T
- S is the address of the symbol
- A is the addend
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
A = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
result = (S + A) | T
l.debug("%s relocated as R_ARM_ABS32 to: 0x%x", self.symbol.name, result)
return result
class R_ARM_MOVW_ABS_NC(ELFReloc):
"""
Relocate R_ARM_MOVW_ABS_NC symbols.
- Class: Static
- Type: Instruction
- Code: 43
- Operation: (S + A) | T
- S is the address of the symbol
- A is the addend
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
inst = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
# initial addend is formed by interpreting the 16-bit literal field
# of the instruction as a signed value
A = ((inst & 0xF0000) >> 4) | (inst & 0xFFF)
if A & 0x8000:
# two's complement
A = -((A ^ 0xFFFF) + 1)
X = (S + A) | T
MaskX = X & 0xFFFF
# inst modification:
part1 = MaskX >> 12
part2 = MaskX & 0xFFF
inst &= 0xFFF0F000 # clears inst[11, 0] and inst[19, 16]
inst |= (part1 << 16) & 0xF0000 # inst[19, 16] = part1
inst |= part2 & 0xFFF # inst[11, 0] = part2
l.debug("%s relocated as R_ARM_MOVW_ABS_NC to: 0x%x", self.symbol.name, inst)
return inst
class R_ARM_MOVT_ABS(ELFReloc):
"""
Relocate R_ARM_MOVT_ABS symbols.
- Class: Static
- Type: Instruction
- Code: 44
- Operation: S + A
- S is the address of the symbol
- A is the addend
"""
@property
def value(self):
inst = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
# initial addend is formed by interpreting the 16-bit literal field
# of the instruction as a signed value
A = ((inst & 0xF0000) >> 4) | (inst & 0xFFF)
if A & 0x8000:
# two's complement
A = -((A ^ 0xFFFF) + 1)
X = S + A
MaskX = X & 0xFFFF0000
# inst modification:
part1 = (MaskX >> 16) >> 12
part2 = (MaskX >> 16) & 0xFFF
inst &= 0xFFF0F000 # clears inst[11, 0] and inst[19, 16]
inst |= (part1 << 16) & 0xF0000 # inst[19, 16] = part1
inst |= part2 & 0xFFF # inst[11, 0] = part2
l.debug("%s relocated as R_ARM_MOVT_ABS to: 0x%x", self.symbol.name, inst)
return inst
class R_ARM_THM_CALL(ELFReloc):
"""
Relocate R_ARM_THM_CALL symbols via instruction modification.
- Class: Static
- Type: ARM (R_ARM_THM_CALL)
- Code: 10
- Operation: ((S + A) | T) - P
- S is the address of the symbol
- A is the addend
- P is the target location (place being relocated)
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction (This bit is entirely irrelevant because the 1-bit of the address gets shifted off in the encoding)
- Encoding: See http://hermes.wings.cs.wisc.edu/files/Thumb-2SupplementReferenceManual.pdf
- Page 71 (3-31) has the chart
- It appears that it mistakenly references the I1 and I2 bits as J1 and J2 in the chart (see the notes at the bottom of the page -- the ranges don't make sense)
- However, the J1/J2 bits are XORed with !S bit in this case (see vex implementation: https://github.com/angr/vex/blob/6d1252c7ce8fe8376318b8f8bb8034058454c841/priv/guest_arm_toIR.c#L19219 )
- Implementation appears correct with the bits placed into offset[23:22]
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._insn_bytes = None
def resolve_symbol(self, solist, **kwargs):
kwargs["thumb"] = True
super().resolve_symbol(solist, **kwargs)
@property
def value(self):
P = self.rebased_addr # Location of this instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
A = 0
# Deconstruct the instruction:
# Because this 4-byte instruction is treated as two 2-byte instructions,
# the bytes are in the order `b3 b4 b1 b2`, where b4 is the most significant.
if self._insn_bytes is None:
self._insn_bytes = self.owner.memory.load(self.relative_addr, 4)
hi = (self._insn_bytes[1] << 8) | self._insn_bytes[0]
lo = (self._insn_bytes[3] << 8) | self._insn_bytes[2]
inst = (hi << 16) | lo
def gen_mask(n_bits, first_bit):
"""
Builds a mask that captures n_bits, where the first bit captured is first_bit
"""
return ((1 << n_bits) - 1) << first_bit
if self.is_rela:
A = self.addend
else:
# Build A (the initial addend)
A |= (inst & gen_mask(11, 0)) << 1 # A[11:1] = inst[10:0] (inclusive)
A |= ((inst & gen_mask(10, 16)) >> 16) << 12 # A[21:12] = inst[25:16]
sign_bit = bool(inst & gen_mask(1, 26)) & 1 # sign_bit = inst[26]
J1 = (bool(inst & gen_mask(1, 13)) & 1) ^ (not sign_bit) # J1 = inst[13] ^ !sign
J2 = (bool(inst & gen_mask(1, 11)) & 1) ^ (not sign_bit) # J2 = inst[11] ^ !sign
A |= J1 << 23 # A[23] = J1
A |= J2 << 22 # A[22] = J2
A &= 0x7FFFFF
if sign_bit:
A |= 0xFF800000
# Compute X, the new offset, from the symbol addr, S, the addend, A,
# the thumb flag, T, and PC, P.
x = (((S + A) | T) - P) & 0xFFFFFFFF # Also mask to 32 bits
# Ensure jump is in range
if x & 0xFF800000 != 0 and x & 0xFF800000 != 0xFF800000:
raise CLEOperationError(
"Jump target out of range for reloc R_ARM_THM_CALL (+- 2^23). "
"This may be due to SimProcedures being allocated outside the jump range. "
"If you believe this is the case, set 'rebase_granularity'=0x1000 in the "
"load options."
)
# Rebuild the instruction, first clearing out any previously set offset bits
# offset 1 2 offset
# 11110S [21:12] 11J?J [11:1] (if ? is 1, BL; if ? is 0, BLX)
inst &= ~0b00000111111111110010111111111111
# | | | | |
# 32 24 16 8 0
sign_bit = bool(x & gen_mask(1, 24)) & 1
J1 = (bool(x & gen_mask(1, 23)) & 1) ^ (not sign_bit)
J2 = (bool(x & gen_mask(1, 22)) & 1) ^ (not sign_bit)
inst |= sign_bit << 26
inst |= J1 << 13
inst |= J2 << 11
inst |= (x & gen_mask(11, 1)) >> 1
inst |= ((x & gen_mask(10, 12)) >> 12) << 16
# Put it back into <little endian short> <little endian short> format
raw = ((inst & 0x00FF0000) >> 16, (inst & 0xFF000000) >> 24, (inst & 0x00FF), (inst & 0xFF00) >> 8)
# The relocation handler expects a little-endian result, so flip it around.
result = (raw[3] << 24) | (raw[2] << 16) | (raw[1] << 8) | raw[0]
l.debug("%s relocated as R_ARM_THM_CALL with new instruction: %#x", self.symbol.name, result)
return result
class R_ARM_COPY(generic.GenericCopyReloc):
pass
class R_ARM_GLOB_DAT(generic.GenericJumpslotReloc):
pass
class R_ARM_JUMP_SLOT(generic.GenericJumpslotReloc):
pass
class R_ARM_RELATIVE(generic.GenericRelativeReloc):
pass
class R_ARM_ABS32_NOI(generic.GenericAbsoluteAddendReloc):
pass
class R_ARM_REL32_NOI(generic.GenericPCRelativeAddendReloc):
pass
class R_ARM_TLS_DTPMOD32(generic.GenericTLSModIdReloc):
pass
class R_ARM_TLS_DTPOFF32(generic.GenericTLSDoffsetReloc):
pass
class R_ARM_TLS_TPOFF32(generic.GenericTLSOffsetReloc):
pass
class R_ARM_JUMP24(R_ARM_CALL):
pass
class R_ARM_PC24(R_ARM_CALL):
pass
# EDG says: Implementing these the easy way.
# Inaccuracies may exist. This is ARM, after all.
class R_ARM_THM_JUMP24(R_ARM_THM_CALL):
pass
class R_ARM_THM_JUMP19(R_ARM_THM_CALL):
pass
class R_ARM_THM_JUMP6(R_ARM_THM_CALL):
pass
class R_ARM_THM_MOVW_ABS_NC(ELFReloc):
"""
((S + A) | T) & 0xffff
Ref: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
"""
@property
def value(self):
insn_bytes = self.owner.memory.load(self.relative_addr, 4)
hi = (insn_bytes[1] << 8) | insn_bytes[0]
lo = (insn_bytes[3] << 8) | insn_bytes[2]
inst = (hi << 16) | lo
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
# initial addend is formed by interpreting the 16-bit literal field
# of the instruction as a signed value
A = (inst & 0b0000_0100_0000_0000_0000_0000_0000_0000) >> 26 << 15
A |= (inst & 0b0000_0000_0000_1111_0000_0000_0000_0000) >> 16 << 11
A |= (inst & 0b0000_0000_0000_0000_0111_0000_0000_0000) >> 12 << 8
A |= inst & 0b0000_0000_0000_0000_0000_0000_1111_1111
if A & 0x8000:
# two's complement
A = -((A ^ 0xFFFF) + 1)
T = _isThumbFunc(self.symbol, S)
X = (S + A) | T
MaskX = X & 0xFFFF
# inst modification:
part1 = MaskX >> 12 # [19:16]
part2 = (MaskX >> 11) & 0x1 # [26]
part3 = (MaskX >> 8) & 0x7 # [14:12]
part4 = MaskX & 0xFF # [7:0]
inst &= 0b1111_1011_1111_0000_1000_1111_0000_0000
inst |= (part1 << 16) & 0b0000_0000_0000_1111_0000_0000_0000_0000
inst |= (part2 << 26) & 0b0000_0100_0000_0000_0000_0000_0000_0000
inst |= (part3 << 12) & 0b0000_0000_0000_0000_0111_0000_0000_0000
inst |= (part4 << 0) & 0b0000_0000_0000_0000_0000_0000_1111_1111
raw = ((inst & 0x00FF0000) >> 16, (inst & 0xFF000000) >> 24, (inst & 0x00FF), (inst & 0xFF00) >> 8)
inst = (raw[3] << 24) | (raw[2] << 16) | (raw[1] << 8) | raw[0]
l.debug("%s relocated as R_ARM_THM_MOVW_ABS_NC to: 0x%x", self.symbol.name, inst)
return inst
class R_ARM_THM_MOVT_ABS(ELFReloc):
"""
(S + A) & 0xffff0000
Ref: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
"""
@property
def value(self):
insn_bytes = self.owner.memory.load(self.relative_addr, 4)
hi = (insn_bytes[1] << 8) | insn_bytes[0]
lo = (insn_bytes[3] << 8) | insn_bytes[2]
inst = (hi << 16) | lo
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
# initial addend is formed by interpreting the 16-bit literal field
# of the instruction as a signed value
A = (inst & 0b0000_0100_0000_0000_0000_0000_0000_0000) >> 26 << 15
A |= (inst & 0b0000_0000_0000_1111_0000_0000_0000_0000) >> 16 << 11
A |= (inst & 0b0000_0000_0000_0000_0111_0000_0000_0000) >> 12 << 8
A |= inst & 0b0000_0000_0000_0000_0000_0000_1111_1111
if A & 0x8000:
# two's complement
A = -((A ^ 0xFFFF) + 1)
X = S + A
MaskX = X & 0xFFFF0000
# inst modification:
part1 = MaskX >> 28 # [19:16]
part2 = (MaskX >> 27) & 0x1 # [26]
part3 = (MaskX >> 24) & 0x7 # [14:12]
part4 = (MaskX >> 16) & 0xFF # [7:0]
inst &= 0b1111_1011_1111_0000_1000_1111_0000_0000
inst |= (part1 << 16) & 0b0000_0000_0000_1111_0000_0000_0000_0000
inst |= (part2 << 26) & 0b0000_0100_0000_0000_0000_0000_0000_0000
inst |= (part3 << 12) & 0b0000_0000_0000_0000_0111_0000_0000_0000
inst |= (part4 << 0) & 0b0000_0000_0000_0000_0000_0000_1111_1111
raw = ((inst & 0x00FF0000) >> 16, (inst & 0xFF000000) >> 24, (inst & 0x00FF), (inst & 0xFF00) >> 8)
inst = (raw[3] << 24) | (raw[2] << 16) | (raw[1] << 8) | raw[0]
l.debug("%s relocated as R_ARM_THM_MOVT_ABS to: 0x%x", self.symbol.name, inst)
return inst

View File

@@ -0,0 +1,119 @@
import logging
from . import generic
from .elfreloc import ELFReloc
l = logging.getLogger(name=__name__)
# http://infocenter.arm.com/help/topic/com.arm.doc.ihi0056b/IHI0056B_aaelf64.pdf
arch = "AARCH64"
class R_AARCH64_ABS64(generic.GenericAbsoluteAddendReloc):
pass
class R_AARCH64_COPY(generic.GenericCopyReloc):
pass
class R_AARCH64_GLOB_DAT(generic.GenericJumpslotReloc):
pass
class R_AARCH64_JUMP_SLOT(generic.GenericJumpslotReloc):
pass
class R_AARCH64_RELATIVE(generic.GenericRelativeReloc):
pass
class R_AARCH64_IRELATIVE(generic.GenericIRelativeReloc):
pass
class R_AARCH64_TLS_DTPREL(generic.GenericTLSDoffsetReloc):
pass
class R_AARCH64_TLS_DTPMOD(generic.GenericTLSModIdReloc):
pass
class R_AARCH64_TLS_TPREL(generic.GenericTLSOffsetReloc):
pass
class R_AARCH64_TLSDESC(generic.GenericTLSDescriptorReloc):
RESOLVER_ADDR = 0xFFFF_FFFF_FFFF_FE00
class R_AARCH64_CALL26(ELFReloc):
"""
Relocation Type: 283
Calculation: (S + A - P)
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
P = self.rebased_addr
return S + A - P
def relocate(self):
if not self.resolved:
return False
if not ((-(2**27)) <= self.value and self.value < (2**27)):
l.warning("relocation out of range")
instr = self.owner.memory.unpack_word(self.relative_addr, size=4) & 0b11111100000000000000000000000000
imm = self.value >> 2 & 0x3FFFFFF
self.owner.memory.pack_word(self.relative_addr, instr | imm, size=4)
return True
class R_AARCH64_ADR_PREL_PG_HI21(ELFReloc):
"""
Relocation Type: 275
Calculation: Page(S + A) - Page(P)
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
P = self.rebased_addr
return ((S + A) & ~0xFFF) - (P & ~0xFFF)
def relocate(self):
if not self.resolved:
return False
if not ((-(2**32)) <= self.value and self.value < (2**32)):
l.warning("relocation out of range")
instr = self.owner.memory.unpack_word(self.relative_addr, size=4) & 0b10011111000000000000000000011111
imm = self.value >> 12 & 0x1FFFFF
immlo = imm & 0b11
immhi = imm >> 2
self.owner.memory.pack_word(self.relative_addr, instr | (immhi << 5) | (immlo << 29), size=4)
return True
class R_AARCH64_ADD_ABS_LO12_NC(ELFReloc):
"""
Relocation Type: 275
Calculation: (S + A)
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
return S + A
def relocate(self):
if not self.resolved:
return False
instr = self.owner.memory.unpack_word(self.relative_addr, size=4) & 0b11111111110000000000001111111111
imm = self.value & 0xFFF
self.owner.memory.pack_word(self.relative_addr, instr | (imm << 10), size=4)
return True

View File

@@ -0,0 +1,3 @@
from .arm import * # pylint: disable=wildcard-import,unused-wildcard-import
arch = "ARMCortexM"

View File

@@ -0,0 +1,3 @@
from .arm import * # pylint: disable=wildcard-import,unused-wildcard-import
arch = "ARMEL"

View File

@@ -0,0 +1,3 @@
from .arm import * # pylint: disable=wildcard-import,unused-wildcard-import
arch = "ARMHF"

View File

@@ -0,0 +1,27 @@
import logging
from ...relocation import Relocation
l = logging.getLogger(name=__name__)
class ELFReloc(Relocation):
def __init__(self, owner, symbol, relative_addr, addend=None):
super().__init__(owner, symbol, relative_addr)
if addend is not None:
self.is_rela = True
self._addend = addend
else:
self.is_rela = False
self._addend = self.owner.memory.unpack_word(self.relative_addr)
@property
def addend(self):
if self._addend is None:
self._addend = self.owner.memory.unpack_word(self.relative_addr)
return self._addend
@property
def value(self): # pylint: disable=no-self-use
l.error("Value property of Relocation must be overridden by subclass!")
return 0

View File

@@ -0,0 +1,207 @@
import logging
from ....address_translator import AT
from ....errors import CLEOperationError, CLEInvalidBinaryError
from ... import SymbolType
from .elfreloc import ELFReloc
l = logging.getLogger(name=__name__)
class GenericTLSDoffsetReloc(ELFReloc):
@property
def value(self):
return self.addend + self.symbol.relative_addr
def resolve_symbol(self, solist, **kwargs): # pylint: disable=unused-argument
self.resolve(None)
return True
class GenericTLSOffsetReloc(ELFReloc):
AUTO_HANDLE_NONE = True
def relocate(self):
hell_offset = self.owner.arch.elf_tls.tp_offset
if self.resolvedby is None:
obj = self.owner
addr = 0
else:
obj = self.resolvedby.owner
addr = self.resolvedby.relative_addr
if obj.tls_block_offset is None:
raise CLEInvalidBinaryError("Illegal relocation - dynamically loaded object using static TLS")
self.owner.memory.pack_word(self.relative_addr, obj.tls_block_offset + self.addend + addr - hell_offset)
class GenericTLSDescriptorReloc(ELFReloc):
# Going VERY far out on a limb here
# "TLS descriptors" are a thing I'm seeing in aarch64 binaries which seem to want to relocate by
# sticking a pointer to a resolver function followed by some arbitrary data. The resolver function
# is passed a pointer to the descriptor. My guess is the resolver is supposed to basically perform
# _tls_get_addr, but the intention is probably to make it possible to work with dynamically loaded objects.
RESOLVER_ADDR = NotImplemented
AUTO_HANDLE_NONE = True
def relocate(self):
if self.resolvedby is None:
obj = self.owner
else:
obj = self.resolvedby.owner
if obj.tls_block_offset is None:
raise CLEInvalidBinaryError("Illegal relocation? - dynamically loaded object using static TLS? Maybe?")
self.owner.memory.pack_word(self.relative_addr, self.RESOLVER_ADDR)
self.owner.memory.pack_word(
self.relative_addr + self.arch.bytes, obj.tls_block_offset + self.addend + self.symbol.relative_addr
) # Should this include the hell offset?
class GenericTLSModIdReloc(ELFReloc):
AUTO_HANDLE_NONE = True
def relocate(self):
if self.symbol.type == SymbolType.TYPE_NONE:
obj = self.owner
else:
obj = self.resolvedby.owner
self.owner.memory.pack_word(self.relative_addr, obj.tls_module_id)
class GenericIRelativeReloc(ELFReloc):
AUTO_HANDLE_NONE = True
def relocate(self):
if self.symbol.type == SymbolType.TYPE_NONE:
self.owner.irelatives.append((AT.from_lva(self.addend, self.owner).to_mva(), self.relative_addr))
else:
self.owner.irelatives.append((self.resolvedby.rebased_addr, self.relative_addr))
class GenericAbsoluteAddendReloc(ELFReloc):
@property
def value(self):
return self.resolvedby.rebased_addr + self.addend
class GenericPCRelativeAddendReloc(ELFReloc):
@property
def value(self):
return self.resolvedby.rebased_addr + self.addend - self.rebased_addr
class GenericJumpslotReloc(ELFReloc):
@property
def value(self):
if self.is_rela:
return self.resolvedby.rebased_addr + self.addend
else:
return self.resolvedby.rebased_addr
class GenericRelativeReloc(ELFReloc):
AUTO_HANDLE_NONE = True
@property
def value(self):
if self.resolvedby is not None:
return self.resolvedby.rebased_addr
return self.owner.mapped_base + self.addend
class GenericAbsoluteReloc(ELFReloc):
@property
def value(self):
return self.resolvedby.rebased_addr
class GenericCopyReloc(ELFReloc):
def resolve_symbol(self, solist, **kwargs):
new_solist = [x for x in solist if x is not self.owner]
super().resolve_symbol(new_solist, **kwargs)
def relocate(self):
if self.resolvedby.size != self.symbol.size and (self.resolvedby.size != 0 or not self.resolvedby.is_extern):
l.error("Export symbol is different size than import symbol for copy relocation: %s", self.symbol.name)
else:
self.owner.memory.store(
self.relative_addr,
self.resolvedby.owner.memory.load(self.resolvedby.relative_addr, self.resolvedby.size),
)
return True
class MipsGlobalReloc(GenericAbsoluteReloc):
pass
class MipsLocalReloc(ELFReloc):
AUTO_HANDLE_NONE = True
def resolve_symbol(self, solist, **kwargs):
self.resolve(None)
def relocate(self):
if self.owner.mapped_base == 0:
return # don't touch local relocations on the main bin
delta = self.owner.mapped_base - self.owner._dynamic["DT_MIPS_BASE_ADDRESS"]
if delta == 0:
return
val = self.owner.memory.unpack_word(self.relative_addr)
newval = val + delta
self.owner.memory.pack_word(self.relative_addr, newval)
class RelocTruncate32Mixin:
"""
A mix-in class for relocations that cover a 32-bit field regardless of the architecture's address word length.
"""
# If True, 32-bit truncated value must equal to its original when zero-extended
check_zero_extend = False
# If True, 32-bit truncated value must equal to its original when sign-extended
check_sign_extend = False
def relocate(self):
arch_bits = self.owner.arch.bits
assert arch_bits >= 32 # 16-bit makes no sense here
val = self.value % (2**arch_bits) # we must truncate it to native range first
if (
self.check_zero_extend
and val >> 32 != 0
or self.check_sign_extend
and val >> 32 != ((1 << (arch_bits - 32)) - 1)
if ((val >> 31) & 1) == 1
else 0
):
raise CLEOperationError(
"relocation truncated to fit: %s; consider making"
" relevant addresses fit in the 32-bit address space." % self.__class__.__name__
)
self.owner.memory.pack_word(self.dest_addr, val, size=4, signed=False)
return True
class RelocGOTMixin:
"""
A mix-in class which will cause the symbol to be resolved to a pointer to the symbol instead of the symbol
"""
def resolve(self, symbol, extern_object=None, **kwargs):
assert extern_object is not None, "I have no idea how this would happen"
got_symbol = extern_object.make_extern("got.%s" % symbol.name, sym_type=SymbolType.TYPE_OBJECT, point_to=symbol)
super().resolve(got_symbol)

View File

@@ -0,0 +1,45 @@
import logging
from . import generic
l = logging.getLogger(name=__name__)
arch = "X86"
class R_386_32(generic.GenericAbsoluteAddendReloc):
pass
class R_386_PC32(generic.GenericPCRelativeAddendReloc):
pass
class R_386_COPY(generic.GenericCopyReloc):
pass
class R_386_GLOB_DAT(generic.GenericJumpslotReloc):
pass
class R_386_JMP_SLOT(generic.GenericJumpslotReloc):
pass
class R_386_RELATIVE(generic.GenericRelativeReloc):
pass
class R_386_IRELATIVE(generic.GenericIRelativeReloc):
pass
class R_386_TLS_DTPMOD32(generic.GenericTLSModIdReloc):
pass
class R_386_TLS_TPOFF(generic.GenericTLSOffsetReloc):
pass
class R_386_TLS_DTPOFF32(generic.GenericTLSDoffsetReloc):
pass

View File

@@ -0,0 +1,49 @@
from . import generic
arch = "MIPS32"
class R_MIPS_32(generic.GenericAbsoluteAddendReloc):
pass
class R_MIPS_REL32(generic.GenericRelativeReloc):
pass
class R_MIPS_JUMP_SLOT(generic.GenericAbsoluteReloc):
pass
class R_MIPS_GLOB_DAT(generic.GenericAbsoluteReloc):
pass
class R_MIPS_TLS_DTPMOD32(generic.GenericTLSModIdReloc):
pass
class R_MIPS_TLS_TPREL32(generic.GenericTLSOffsetReloc):
pass
class R_MIPS_TLS_DTPREL32(generic.GenericTLSDoffsetReloc):
pass
class R_MIPS_HI16(generic.GenericAbsoluteReloc):
def relocate(self):
if not self.resolved:
return False
self.owner.memory.pack_word(self.dest_addr, self.value >> 16, size=2)
return True
class R_MIPS_LO16(generic.GenericAbsoluteReloc):
def relocate(self):
if not self.resolved:
return False
self.owner.memory.pack_word(self.dest_addr, self.value & 0xFFFF, size=2)
return True

View File

@@ -0,0 +1,27 @@
from . import generic
arch = "MIPS64"
class R_MIPS_64(generic.GenericAbsoluteAddendReloc):
pass
class R_MIPS_REL32(generic.GenericRelativeReloc):
pass
class R_MIPS_COPY(generic.GenericCopyReloc):
pass
class R_MIPS_TLS_DTPMOD64(generic.GenericTLSModIdReloc):
pass
class R_MIPS_TLS_DTPREL64(generic.GenericTLSDoffsetReloc):
pass
class R_MIPS_TLS_TPREL64(generic.GenericTLSOffsetReloc):
pass

View File

@@ -0,0 +1,163 @@
import logging
from . import generic
from .elfreloc import ELFReloc
l = logging.getLogger(name=__name__)
# http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.pdf
arch = "PPC64"
class R_PPC64_JMP_SLOT(ELFReloc):
def relocate(self):
if self.owner.is_ppc64_abiv1:
# R_PPC64_JMP_SLOT
# http://osxr.org/glibc/source/sysdeps/powerpc/powerpc64/dl-machine.h?v=glibc-2.15#0405
# copy an entire function descriptor struct
addr = self.resolvedby.owner.memory.unpack_word(self.resolvedby.relative_addr)
toc = self.resolvedby.owner.memory.unpack_word(self.resolvedby.relative_addr + 8)
aux = self.resolvedby.owner.memory.unpack_word(self.resolvedby.relative_addr + 16)
self.owner.memory.pack_word(self.relative_addr, addr)
self.owner.memory.pack_word(self.relative_addr + 8, toc)
self.owner.memory.pack_word(self.relative_addr + 16, aux)
else:
self.owner.memory.pack_word(self.relative_addr, self.resolvedby.rebased_addr)
return True
class R_PPC64_RELATIVE(generic.GenericRelativeReloc):
pass
class R_PPC64_IRELATIVE(generic.GenericIRelativeReloc):
pass
class R_PPC64_ADDR64(generic.GenericAbsoluteAddendReloc):
pass
class R_PPC64_GLOB_DAT(generic.GenericJumpslotReloc):
pass
class R_PPC64_DTPMOD64(generic.GenericTLSModIdReloc):
pass
class R_PPC64_DTPREL64(generic.GenericTLSDoffsetReloc):
pass
class R_PPC64_TPREL64(generic.GenericTLSOffsetReloc):
pass
class R_PPC64_REL24(ELFReloc):
"""
Relocation Type: 10
Calculation: (S + A - P) >> 2
Field: low24*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
P = self.rebased_addr
return (S + A - P) >> 2
def relocate(self):
if not self.resolved:
return False
instr = self.owner.memory.unpack_word(self.relative_addr, size=4) & 0b11111100000000000000000000000011
imm = self.value & 0xFFFFFF
self.owner.memory.pack_word(self.relative_addr, instr | (imm << 2), size=4)
return True
class R_PPC64_TOC16_LO(ELFReloc):
"""
Relocation Type: 48
Calculation: #lo(S + A - .TOC.)
Field: half16
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
if self.owner.ppc64_initial_rtoc is None:
l.warning(".TOC. value not found")
return (S + A) & 0xFFFF
TOC = self.owner.ppc64_initial_rtoc
return (S + A - TOC) & 0xFFFF
def relocate(self):
if not self.resolved:
return False
self.owner.memory.pack_word(self.relative_addr, self.value, size=2)
return True
class R_PPC64_TOC16_HI(ELFReloc):
"""
Relocation Type: 49
Calculation: #hi(S + A - .TOC.)
Field: half16
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
if self.owner.ppc64_initial_rtoc is None:
l.warning(".TOC. value not found")
return ((S + A) >> 16) & 0xFFFF
TOC = self.owner.ppc64_initial_rtoc
return ((S + A - TOC) >> 16) & 0xFFFF
def relocate(self):
if not self.resolved:
return False
self.owner.memory.pack_word(self.relative_addr, self.value, size=2)
return True
class R_PPC64_TOC16_HA(ELFReloc):
"""
Relocation Type: 50
Calculation: #ha(S + A - .TOC.)
Field: half16
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
if self.owner.ppc64_initial_rtoc is None:
l.warning(".TOC. value not found")
return (((S + A) >> 16) + (1 if ((S + A) & 0x8000) else 0)) & 0xFFFF
TOC = self.owner.ppc64_initial_rtoc
return (((S + A - TOC) >> 16) + (1 if ((S + A - TOC) & 0x8000) else 0)) & 0xFFFF
def relocate(self):
if not self.resolved:
return False
self.owner.memory.pack_word(self.relative_addr, self.value, size=2)
return True
class R_PPC64_TOC(ELFReloc):
"""
Relocation Type: 51
Calculation: .TOC.
Field: doubleword64
"""
@property
def value(self):
if self.owner.ppc64_initial_rtoc is None:
l.warning(".TOC. value not found")
return 0
return self.owner.ppc64_initial_rtoc

View File

@@ -0,0 +1,422 @@
import logging
from . import generic
from .elfreloc import ELFReloc
l = logging.getLogger(name=__name__)
arch = "PPC32"
# Reference: System V Application Binary Interface, PowerPC Processor Supplement
# http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf
# PPC constants/masks to be used in relocations
PPC_WORD32 = 0xFFFFFFFF
PPC_WORD30 = 0xFFFFFFFC
PPC_LOW24 = 0x03FFFFFC
PPC_LOW14 = 0x0020FFFC
PPC_HALF16 = 0xFFFF
PPC_BL_INST = 0x48000001
class R_PPC_ADDR32(generic.GenericAbsoluteAddendReloc):
pass
class R_PPC_ADDR24(ELFReloc):
"""
Relocation Type: 0x2
Calculation: (S + A) >> 2
Field: low24*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = (S + A) >> 2
return result
class R_PPC_ADDR16(ELFReloc):
"""
Relocation Type: 0x3
Calculation: S+A
Field: half16*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = S + A
return result
class R_PPC_ADDR16_LO(ELFReloc): # pylint: disable=undefined-variable
"""
Relocation Type: 0x4
Calculation: #lo(S + A)
Field: half16
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = S + A
result = result & PPC_HALF16
return result
def relocate(self):
if not self.resolved:
return False
self.owner.memory.pack_word(self.relative_addr, self.value, size=2)
return True
class R_PPC_ADDR16_HI(ELFReloc):
"""
Relocation Type: 0x5
Calculation: #hi(S + A)
Field: half16
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = (S + A) >> 16
result = result & PPC_HALF16
return result
def relocate(self):
if not self.resolved:
return False
self.owner.memory.pack_word(self.relative_addr, self.value, size=2)
return True
class R_PPC_ADDR16_HA(ELFReloc): # pylint: disable=undefined-variable
"""
Relocation Type: 0x6
Calculation: #ha(S + A)
Field: half16
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = S + A
result = ((result >> 16) + (1 if (result & 0x8000) else 0)) & PPC_HALF16
return result
def relocate(self):
if not self.resolved:
return False
self.owner.memory.pack_word(self.relative_addr, self.value, size=2)
return True
class R_PPC_ADDR14(ELFReloc):
"""
Relocation Type: 0x7
Calculation: (S + A) >> 2
Field: low14*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = (S + A) >> 2
return result
class R_PPC_ADDR14_BRTAKEN(ELFReloc):
"""
Relocation Type: 0x8
Calculation: (S + A) >> 2
Field: low14*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = (S + A) >> 2
return result
class R_PPC_ADDR14_BRNTAKEN(ELFReloc):
"""
Relocation Type: 0x9
Calculation: (S + A) >> 2
Field: low14*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = (S + A) >> 2
return result
class R_PPC_REL24(ELFReloc): # pylint: disable=undefined-variable
"""
Relocation Type: 0xa
Calculation: (S + A - P) >> 2
Field: low24*
R_PPC_REL24 is a special type of relocation.
The instruction must be modified for this type.
This relocation type resolves branch-and-link instructions.
Prior to relocation, all instances of the branch-and-link instruction
will consist of the following bytecode: 48 00 00 01.
The problem with this is that all instances will result in calls to
the current address - thus an infinite loop.
After calculating the relocation result in R_PPC_REL24,
you will have an address offset to the call.
The result must be resolved to the correct instruction encoding.
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
P = self.rebased_addr
result = (S + A - P) >> 2
result = (result << 2) & PPC_LOW24
result = (A & ~PPC_LOW24) | result
result = result | PPC_BL_INST
return result
class R_PPC_REL14(ELFReloc):
"""
Relocation Type: 0xb
Calculation: (S + A - P) >> 2
Field: low14*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
P = self.rebased_addr
result = (S + A - P) >> 2
result = (result << 2) & PPC_LOW14
result = (A & ~PPC_LOW14) | result
return result
class R_PPC_REL14_BRTAKEN(ELFReloc):
"""
Relocation Type: 0xc
Calculation: (S + A - P) >> 2
Field: low14*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
P = self.rebased_addr
result = (S + A - P) >> 2
result = (result << 2) & PPC_LOW14
result = (A & ~PPC_LOW14) | result
return result
class R_PPC_REL14_BRNTAKEN(ELFReloc):
"""
Relocation Type: 0xd
Calculation: (S + A - P) >> 2
Field: low14*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
P = self.rebased_addr
result = (S + A - P) >> 2
result = (result << 2) & PPC_LOW14
result = (A & ~PPC_LOW14) | result
return result
class R_PPC_COPY(generic.GenericCopyReloc):
pass
class R_PPC_GLOB_DAT(generic.GenericJumpslotReloc):
pass
class R_PPC_JMP_SLOT(generic.GenericJumpslotReloc):
def relocate(self):
if "DT_PPC_GOT" not in self.owner._dynamic and "DT_LOPROC" not in self.owner._dynamic:
l.error("This binary is relocated incorrectly. See https://github.com/angr/cle/issues/142 for details.")
super().relocate()
class R_PPC_RELATIVE(generic.GenericRelativeReloc):
pass
class R_PPC_UADDR32(ELFReloc):
"""
Relocation Type: 0x18
Calculation: S + A
Field: word32
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = S + A
return result
class R_PPC_UADDR16(ELFReloc):
"""
Relocation Type: 0x19
Calculation: S + A
Field: half16*
"""
@property
def value(self):
A = self.addend
S = self.resolvedby.rebased_addr
result = S + A
return result
class R_PPC_REL32(ELFReloc): # pylint: disable=undefined-variable
"""
Relocation Type: 0x1a
Calculation: S + A - P
Field: word32
"""
@property
def value(self):
P = self.rebased_addr
A = self.addend
S = self.resolvedby.rebased_addr
result = (S + A - P) & PPC_WORD32
return result
class R_PPC_SECTOFF(ELFReloc):
"""
Relocation Type: 0x21
Calculation: R + A
Field: half16*
"""
@property
def value(self):
R = self.relative_addr
A = self.addend
result = R + A
return result
class R_PPC_SECTOFF_LO(ELFReloc):
"""
Relocation Type: 0x22
Calculation: #lo(R + A)
Field: half16
"""
@property
def value(self):
R = self.relative_addr
A = self.addend
result = R + A
result = result & PPC_HALF16
return result
class R_PPC_SECTOFF_HI(ELFReloc):
"""
Relocation Type: 0x23
Calculation: #hi(R + A)
Field: half16
"""
@property
def value(self):
R = self.relative_addr
A = self.addend
result = (R + A) >> 16
result = result & PPC_HALF16
return result
class R_PPC_SECTOFF_HA(ELFReloc):
"""
Relocation Type: 0x24
Calculation: #ha(R + A)
Field: half16
"""
@property
def value(self):
R = self.relative_addr
A = self.addend
result = R + A
result = ((result >> 16) + (1 if (result & 0x8000) else 0)) & PPC_HALF16
return result
class R_PPC_ADDR30(ELFReloc):
"""
Relocation Type: 0x25
Calculation: (S + A - P) >> 2
Field: word30
"""
@property
def value(self):
S = self.resolvedby.rebased_addr
A = self.addend
P = self.rebased_addr
result = (S + A - P) >> 2
return result
class R_PPC_DTPMOD32(generic.GenericTLSModIdReloc):
pass
class R_PPC_DTPREL32(generic.GenericTLSDoffsetReloc):
pass
class R_PPC_TPREL32(generic.GenericTLSOffsetReloc):
pass

View File

@@ -0,0 +1,31 @@
from . import generic
arch = "S390X"
class R_390_GLOB_DAT(generic.GenericJumpslotReloc):
pass
class R_390_JMP_SLOT(generic.GenericJumpslotReloc):
pass
class R_390_RELATIVE(generic.GenericRelativeReloc):
pass
class R_390_64(generic.GenericAbsoluteAddendReloc):
pass
class R_390_TLS_TPOFF(generic.GenericTLSOffsetReloc):
pass
class R_390_IRELATIVE(generic.GenericIRelativeReloc):
pass
class R_390_COPY(generic.GenericCopyReloc):
pass

View File

@@ -0,0 +1,48 @@
from typing import List
from .variable import Variable
class LexicalBlock:
"""
A lexical block is a sequence of source statements, e.g. a while/for
loop or an if statement or some bracketed block.
Corresponds to a DW_TAG_LexicalBlock in DWARF.
:param super_block: The lexical block which contains this block
:param low_pc: The relative start address of the block
:param high_pc: The relative end address of the block
:ivar low_pc: The relative start address of the subprogram
:ivar high_pc: The relative end address of the subprogram
:ivar child_blocks: Lexical blocks inside this block (only direct childs)
:type child_blocks: List[LexicalBlock]
"""
def __init__(self, low_pc, high_pc) -> None:
self.low_pc = low_pc
self.high_pc = high_pc
self.child_blocks: List[LexicalBlock] = []
class Subprogram(LexicalBlock):
"""
DW_TAG_subprogram for DWARF. The behavior is mostly inherited from
LexicalBlock to avoid redundancy.
:param name: The name of the function/program
:param low_pc: The relative start address of the subprogram
:param high_pc: The relative end address of the subprogram
:ivar name: The name of the function/program
:type name: str
:ivar local_variables: All local variables in a Subprogram (they may reside in serveral child blocks)
:type local_variables: List[Variables]
"""
def __init__(self, name, low_pc, high_pc) -> None:
# pass self as the super_block of this subprogram
self.subprogram = self
super().__init__(low_pc, high_pc)
self.name = name
self.local_variables: List[Variable] = []

View File

@@ -0,0 +1,62 @@
from elftools.elf.enums import ENUM_ST_INFO_TYPE
from ..symbol import Symbol, SymbolType
from ...address_translator import AT
from .symbol_type import ELFSymbolType
def maybedecode(string):
return string if type(string) is str else string.decode()
class ELFSymbol(Symbol):
"""
Represents a symbol for the ELF format.
:ivar str binding: The binding of this symbol as an ELF enum string
:ivar section: The section associated with this symbol, or None
:ivar _subtype: The ELFSymbolType of this symbol
"""
def __init__(self, owner, symb):
subtype_num = ENUM_ST_INFO_TYPE.get(symb.entry.st_info.type, symb.entry.st_info.type)
arch_list = [owner.arch.name, None]
if "UNIX" in owner.os:
arch_list.insert(1, "gnu")
for arch in arch_list:
try:
self._subtype = ELFSymbolType((subtype_num, arch))
except ValueError:
pass
else:
self._type = self._subtype.to_base_type()
break
else:
self._subtype = None
self._type = SymbolType.TYPE_OTHER
sec_ndx, value = symb.entry.st_shndx, symb.entry.st_value
# A relocatable object's symbol's value is relative to its section's addr.
if owner.is_relocatable and isinstance(sec_ndx, int):
value += owner.sections[sec_ndx].remap_offset
super().__init__(
owner, maybedecode(symb.name), AT.from_lva(value, owner).to_rva(), symb.entry.st_size, self.type
)
self.version = None
self.binding = symb.entry.st_info.bind
self.is_hidden = symb.entry["st_other"]["visibility"] == "STV_HIDDEN"
self.section = sec_ndx if type(sec_ndx) is not str else None
self.is_static = self._type == SymbolType.TYPE_SECTION or sec_ndx == "SHN_ABS"
self.is_common = sec_ndx == "SHN_COMMON"
self.is_weak = self.binding == "STB_WEAK"
self.is_local = self.binding == "STB_LOCAL"
self.is_import = sec_ndx == "SHN_UNDEF" and self.binding in ("STB_GLOBAL", "STB_WEAK")
self.is_export = (self.section is not None or self.is_common) and self.binding in ("STB_GLOBAL", "STB_WEAK")
@property
def subtype(self) -> ELFSymbolType:
return self._subtype

View File

@@ -0,0 +1,146 @@
import logging
from ..symbol import SymbolType, SymbolSubType
_l = logging.getLogger(name=__name__)
class ELFSymbolType(SymbolSubType):
"""
ELF-specific symbol types
"""
# Enum classes cannot be inherited. Therefore, additional platform-specific
# values should simply be added to this enumeration (e.g., STT_GNU_IFUNC)
# with an appropriate conversion in `to_base_type()`.
#
# Though that could be solved with IntEnum as well, that breaks the
# strong typing and is discouraged by Python docs.
# Basic types
STT_NOTYPE = (0, None) # Symbol's type is not specified
STT_OBJECT = (1, None) # Symbol is a data object (variable, array, etc.)
STT_FUNC = (2, None) # Symbol is executable code (function, etc.)
STT_SECTION = (3, None) # Symbol refers to a section
STT_FILE = (4, None) # Local, absolute symbol that refers to a file
STT_COMMON = (5, None) # An uninitialized common block
STT_TLS = (6, None) # Thread local data object
# ELF's generic place-holders
STT_LOOS = (10, None) # Lowest operating system-specific symbol type
STT_HIOS = (12, None) # Highest operating system-specific symbol type
STT_LOPROC = (13, None) # Lowest processor-specific symbol type
STT_HIPROC = (15, None) # Highest processor-specific symbol type
#
# OS- and processor-specific types. Note that the entire range
# of values is used indiscriminantly for OS or processor.
#
# Try to use values that map to an `archinfo.Arch` so that `arch_from_id()`
# is able to return a specific type. Otherwise, use something indicative
# of its purpose.
#
# GNU indirect function
#
# HACK: It's GNU-specific, not OS-specific but GNU doesn't care. This
# shouldn't be an issue unless someone tries analyzing an old ELF that
# uses STT_LOOS for something else, before STT_GNU_IFUNC came about, in
# which case angr will need a new SimOS variant anyway.
STT_GNU_IFUNC = (STT_LOOS[0], "gnu")
#
# Below are examples of additional types that can be added. These are
# commented out since they've never actually been used or tested.
#
# AMDGPU HSA
#
# https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/master/src/inc/amd_hsa_elf.h
# TODO: Update the arch name here if this arch is ever supported
# STT_AMDGPU_HSA_KERNEL = (STT_LOOS[0], 'amdgpu_hsa')
# STT_AMDGPU_HSA_INDIRECT_FUNCTION = (STT_LOOS[0] + 1, 'amdgpu_hsa')
# STT_AMDGPU_HSA_METADATA = (STT_LOOS[0] + 2, 'amdgpu_hsa')
# HP Precision Architecture (PA-RISC)
#
# https://github.com/lattera/glibc/blob/master/elf/elf.h
# TODO: Update the arch name here if this arch is ever supported
# STT_HP_OPAQUE = (STT_LOOS[0] + 1, 'hppa')
# STT_HP_STUB = (STT_LOOS[0] + 2, 'hppa')
# STT_PARISC_MILLICODE = (STT_LOPROC[0], 'hppa')
def __init__(self, *args): # pylint: disable=unused-argument
# Essentially a static type check, this will fail on import
# if someone defines a type that's not a `tuple`
if not isinstance(self.value, tuple):
raise ValueError(
f"Symbol value '{self.value}' for member '{self.name}' is invalid. Values must be tuples."
) # pylint: disable=logging-format-interpolation
def __repr__(self):
return f"ELFSymbolType.{self.name}: (elf_value: {self.elf_value}, os_proc: {self.os_proc})"
def __eq__(self, other):
if type(self) is not type(other):
return False
return self.value[0] == other.value[0]
def __ne__(self, other):
return not (self == other)
@property
def elf_value(self):
return self.value[0] # pylint: disable=unsubscriptable-object
@property
def os_proc(self):
return self.value[1] # pylint: disable=unsubscriptable-object
@property
def is_custom_os_proc(self):
if self.elf_value in range(self.STT_LOOS.elf_value, self.STT_HIPROC.elf_value + 1): # pylint: disable=no-member
return self.os_proc is not None
return False
def to_base_type(self):
if self is ELFSymbolType.STT_NOTYPE:
return SymbolType.TYPE_NONE
elif self in [ELFSymbolType.STT_FUNC, ELFSymbolType.STT_GNU_IFUNC]:
return SymbolType.TYPE_FUNCTION
elif self in [ELFSymbolType.STT_OBJECT, ELFSymbolType.STT_COMMON]:
return SymbolType.TYPE_OBJECT
elif self is ELFSymbolType.STT_SECTION:
return SymbolType.TYPE_SECTION
elif self is ELFSymbolType.STT_TLS:
return SymbolType.TYPE_TLS_OBJECT
elif self is ELFSymbolType.STT_GNU_IFUNC:
return SymbolType.TYPE_FUNCTION
else:
return SymbolType.TYPE_OTHER
def __ELFSymbolTypeArchParser(cls, value):
"""
This is just a nice way to allow for just specifying the `int` for
default types: `ELFSymbolType(10)` rather than `ELFSymbolType((10,None))`.
Idea courtesy: https://stackoverflow.com/q/24105268/1137728.
We don't need to implement the `str` parsing like the SO link above since
`Enum` already has built-in item access: `ELFSymbolType['STT_FUNC']`.
"""
if isinstance(value, int):
return super(ELFSymbolType, cls).__new__(cls, (value, None))
else:
return super(ELFSymbolType, cls).__new__(cls, value)
setattr(ELFSymbolType, "__new__", __ELFSymbolTypeArchParser)

View File

@@ -0,0 +1,150 @@
from typing import Optional, TYPE_CHECKING
from elftools.dwarf.die import DIE
from cle.address_translator import AT
from .variable_type import VariableType
if TYPE_CHECKING:
from .elf import ELF
from .subprogram import LexicalBlock
class Variable:
"""
Variable for DWARF
from a DW_TAG_variable or DW_TAG_formal_parameter
:ivar str name: The name of the variable
:ivar relative_addr: The relative addr (base addr depends on the type)
:ivar lexical_block: For a local variable, the lexical block where the variable is declared
"""
def __init__(self, elf_object: "ELF"):
self._elf_object = elf_object
# all other optional params can be set afterwards
self.relative_addr = None
self.name = None
self._type_offset = None
self.decl_line = None
self.decl_file = None
self.lexical_block = None
self.external = False
self.declaration_only = False
@staticmethod
def from_die(die: DIE, expr_parser, elf_object: "ELF", lexical_block: Optional["LexicalBlock"] = None):
# first the address
if "DW_AT_location" in die.attributes and die.attributes["DW_AT_location"].form == "DW_FORM_exprloc":
parsed_exprs = expr_parser.parse_expr(die.attributes["DW_AT_location"].value)
if len(parsed_exprs) == 1 and parsed_exprs[0].op_name == "DW_OP_addr":
addr = parsed_exprs[0].args[0]
var = MemoryVariable(elf_object, addr)
elif len(parsed_exprs) == 1 and parsed_exprs[0].op_name == "DW_OP_fbreg":
addr = parsed_exprs[0].args[0]
var = StackVariable(elf_object, addr)
elif len(parsed_exprs) == 1 and parsed_exprs[0].op_name.startswith("DW_OP_reg"):
addr = parsed_exprs[0].op - 0x50 # 0x50 == DW_OP_reg0
var = RegisterVariable(elf_object, addr)
else:
var = Variable(elf_object)
else:
var = Variable(elf_object)
if "DW_AT_name" in die.attributes:
var.name = die.attributes["DW_AT_name"].value.decode("utf-8")
if "DW_AT_type" in die.attributes:
var._type_offset = die.attributes["DW_AT_type"].value + die.cu.cu_offset
if "DW_AT_decl_line" in die.attributes:
var.decl_line = die.attributes["DW_AT_decl_line"].value
if "DW_AT_external" in die.attributes:
var.external = True
if "DW_AT_declaration" in die.attributes:
var.declaration_only = True
var.lexical_block = lexical_block
return var
# overwritten for stack variables
def rebased_addr_from_cfa(self, cfa: int):
"""
The address of this variable in the global memory.
:param cfa: The canonical frame address as described by the DWARF standard.
"""
return self.rebased_addr
@property
def rebased_addr(self):
return None
@property
def addr(self):
"""
Please use 'relative_addr' or 'rebased_addr' instead.
"""
return self.relative_addr
@property
def type(self) -> VariableType:
try:
return self._elf_object.type_list[self._type_offset]
except KeyError:
return None
@property
def sort(self) -> str:
# sort = 'stack' | 'register' | 'global'
return "unknown"
class MemoryVariable(Variable):
"""
This includes all variables that are not on the stack and not in a register.
So all global variables, and also local static variables in C!
"""
def __init__(self, elf_object: "ELF", relative_addr):
super().__init__(elf_object)
self.relative_addr = relative_addr
@property
def rebased_addr(self):
return AT.from_rva(self.relative_addr, self._elf_object).to_mva()
@property
def sort(self) -> str:
return "global"
class StackVariable(Variable):
"""
Stack Variable from DWARF.
"""
def __init__(self, elf_object: "ELF", relative_addr):
super().__init__(elf_object)
self.relative_addr = relative_addr
def rebased_addr_from_cfa(self, cfa: int):
return self.relative_addr + cfa
@property
def sort(self) -> str:
return "stack"
class RegisterVariable(Variable):
"""
Register Variable from DWARF.
"""
def __init__(self, elf_object: "ELF", register_addr):
super().__init__(elf_object)
# FIXME should this really go into relative addr?
self.relative_addr = register_addr
@property
def sort(self) -> str:
return "register"

View File

@@ -0,0 +1,294 @@
from elftools.dwarf.die import DIE
class VariableType:
"""
Entry class for DW_TAG_xxx_type
:param name: name of the type
:param byte_size: amount of bytes the type take in memory
:param elf_object: elf object to reference to (useful for pointer,...)
:ivar name: name of the type
:type name: str
:ivar byte_size: amount of bytes the type take in memory
"""
def __init__(self, name: str, byte_size: int, elf_object):
self.name = name
self.byte_size = byte_size
self._elf_object = elf_object
@staticmethod
def read_from_die(die: DIE, elf_object):
"""
entry method to read a DW_TAG_xxx_type
"""
if die.tag == "DW_TAG_base_type":
return BaseType.read_from_die(die, elf_object)
elif die.tag == "DW_TAG_pointer_type":
return PointerType.read_from_die(die, elf_object)
elif die.tag == "DW_TAG_structure_type":
return StructType.read_from_die(die, elf_object)
elif die.tag == "DW_TAG_array_type":
return ArrayType.read_from_die(die, elf_object)
elif die.tag == "DW_TAG_typedef":
return TypedefType.read_from_die(die, elf_object)
elif die.tag == "DW_TAG_union_type":
return UnionType.read_from_die(die, elf_object)
return None
@staticmethod
def supported_die(die: DIE) -> bool:
return die.tag in (
"DW_TAG_base_type",
"DW_TAG_pointer_type",
"DW_TAG_structure_type",
"DW_TAG_array_type",
"DW_TAG_typedef",
"DW_TAG_union_type",
)
class PointerType(VariableType):
"""
Entry class for DW_TAG_pointer_type. It is inherited from VariableType
:param byte_size: amount of bytes the type take in memory
:param elf_object: elf object to reference to (useful for pointer,...)
:param referenced_offset: type of the referenced as offset in the compilation_unit
"""
def __init__(self, byte_size: int, elf_object, referenced_offset: int):
super().__init__("pointer", byte_size, elf_object)
self._referenced_offset = referenced_offset
@classmethod
def read_from_die(cls, die: DIE, elf_object):
"""
read an entry of DW_TAG_pointer_type. return None when there is no
byte_size or type attribute.
"""
byte_size = die.attributes.get("DW_AT_byte_size", None)
if byte_size is None:
return None
dw_at_type = die.attributes.get("DW_AT_type", None)
if dw_at_type is None:
referenced_offset = None
else:
referenced_offset = dw_at_type.value + die.cu.cu_offset
return cls(byte_size.value, elf_object, referenced_offset)
@property
def referenced_type(self):
"""
attribute to get the referenced type. Return None if the type is not loaded
"""
type_list = self._elf_object.type_list
if self._referenced_offset in type_list.keys():
return type_list[self._referenced_offset]
return None
class BaseType(VariableType):
"""
Entry class for DW_TAG_base_type. It is inherited from VariableType
"""
# for __init__ see VariableType
@classmethod
def read_from_die(cls, die: DIE, elf_object):
"""
read an entry of DW_TAG_base_type. return None when there is no
byte_size attribute.
"""
dw_at_name = die.attributes.get("DW_AT_name", None)
byte_size = die.attributes.get("DW_AT_byte_size", None)
if byte_size is None:
return None
return cls(dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size.value, elf_object)
class StructType(VariableType):
"""
Entry class for DW_TAG_structure_type. It is inherited from VariableType
:param name: name of the type
:param byte_size: amount of bytes the type take in memory
:param elf_object: elf object to reference to (useful for pointer,...)
"""
def __init__(self, name: str, byte_size: int, elf_object, members):
super().__init__(name, byte_size, elf_object)
self.members = members
@classmethod
def read_from_die(cls, die: DIE, elf_object):
"""
read an entry of DW_TAG_structure_type. return None when there is no
byte_size attribute.
"""
dw_at_name = die.attributes.get("DW_AT_name", None)
byte_size = die.attributes.get("DW_AT_byte_size", None)
if byte_size is None:
return None
members = []
for die_child in die.iter_children():
if die_child.tag == "DW_TAG_member":
members.append(StructMember.read_from_die(die_child, elf_object))
return cls(
dw_at_name.value.decode() if dw_at_name is not None else "unknown", byte_size.value, elf_object, members
)
def __getitem__(self, member_name):
for member in self.members:
if member.name == member_name:
return member
raise KeyError
class UnionType(StructType):
"""
Entry class for DW_TAG_union_type. Inherits from StructType to make it trivial.
"""
class StructMember:
"""
Entry class for DW_TAG_member. This is not a type but a named member inside a struct.
Use the property `type` to get its variable type.
:param name: name of the member
:param addr_offset: address offset of the member in the struct
:param elf_object: elf object to reference to (useful for pointer,...)
:param type_offset: type as offset in the compilation_unit
:ivar name: name of the member
"""
def __init__(self, name: str, addr_offset: int, type_offset, elf_object):
self.name = name
self.addr_offset = addr_offset
self._elf_object = elf_object
self._type_offset = type_offset
@classmethod
def read_from_die(cls, die: DIE, elf_object):
"""
read an entry of DW_TAG_member_type. return None when there is no
type attribute.
"""
dw_at_name = die.attributes.get("DW_AT_name", None)
dw_at_type = die.attributes.get("DW_AT_type", None)
dw_at_memloc = die.attributes.get("DW_AT_data_member_location", None)
name = None if dw_at_name is None else dw_at_name.value.decode()
ty = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset
# From the DWARF5 manual, page 118:
# The member entry corresponding to a data member that is defined in a structure,
# union or class may have either a DW_AT_data_member_location attribute or a
# DW_AT_data_bit_offset attribute. If the beginning of the data member is the
# same as the beginning of the containing entity then neither attribute is required.
# TODO bit_offset
addr_offset = 0 if dw_at_memloc is None else dw_at_memloc.value
return cls(name, addr_offset, ty, elf_object)
@property
def type(self):
"""
attribute to get the type of the member. Return None if the type is not loaded
"""
type_list = self._elf_object.type_list
if self._type_offset in type_list.keys():
return type_list[self._type_offset]
return None
class ArrayType(VariableType):
"""
Entry class for DW_TAG_array_type. It is inherited from VariableType
:param byte_size: amount of bytes the type take in memory
:param elf_object: elf object to reference to (useful for pointer,...)
:param element_offset: type of the array elements as offset in the compilation_unit
"""
def __init__(self, byte_size, elf_object, element_offset):
super().__init__("array", byte_size, elf_object)
self._element_offset = element_offset
@classmethod
def read_from_die(cls, die: DIE, elf_object):
"""
read an entry of DW_TAG_array_type. return None when there is no
type attribute.
"""
dw_byte_size = die.attributes.get("DW_AT_byte_size", None)
dw_at_type = die.attributes.get("DW_AT_type", None)
if dw_at_type is None:
return None
return cls(
dw_byte_size.value if dw_byte_size is not None else None, elf_object, dw_at_type.value + die.cu.cu_offset
)
@property
def element_type(self):
type_list = self._elf_object.type_list
if self._element_offset in type_list.keys():
return type_list[self._element_offset]
return None
class TypedefType(VariableType):
"""
Entry class for DW_TAG_typedef. Inherits from VariableType.
:param name: name of the new type
:param elf_object: elf object to reference to (useful for pointer,...)
:param type_offset: type as offset in the compilation_unit
"""
def __init__(self, name: str, byte_size, elf_object, type_offset):
super().__init__(name, byte_size, elf_object)
self._type_offset = type_offset
@classmethod
def read_from_die(cls, die: DIE, elf_object):
"""
read an entry of DW_TAG_member_type. return None when there is no
type attribute.
"""
dw_at_name = die.attributes.get("DW_AT_name", None)
dw_at_type = die.attributes.get("DW_AT_type", None)
dw_at_byte_size = die.attributes.get("DW_AT_byte_size", None)
name = None if dw_at_name is None else dw_at_name.value.decode()
type_offset = None if dw_at_type is None else dw_at_type.value + die.cu.cu_offset
byte_size = None if dw_at_byte_size is None else dw_at_byte_size.value
return cls(name, byte_size, elf_object, type_offset)
@property
def type(self):
"""
attribute to get the type of the member. Return None if the type is not loaded
"""
type_list = self._elf_object.type_list
if self._type_offset in type_list.keys():
return type_list[self._type_offset]
return None

View File

@@ -0,0 +1,294 @@
import logging
from cle.backends import Backend, Symbol, Segment, SymbolType
from cle.backends.relocation import Relocation
from cle.utils import ALIGN_UP
from cle.errors import CLEOperationError, CLEError
from cle.address_translator import AT
l = logging.getLogger(name=__name__)
class ExternSegment(Segment):
def __init__(self, map_size):
super().__init__(None, 0, None, map_size)
def addr_to_offset(self, addr):
raise CLEOperationError(
"'offset' operations on the extern object are meaningless as it is not mapped from a file"
)
def offset_to_addr(self, offset):
raise CLEOperationError(
"'offset' operations on the extern object are meaningless as it is not mapped from a file"
)
def contains_offset(self, offset):
return False
is_readable = True
is_writable = True
is_executable = True
class TOCRelocation(Relocation):
@property
def value(self):
return self.resolvedby.rebased_addr
class ExternObject(Backend):
def __init__(self, loader, map_size=0, tls_size=0):
super().__init__("cle##externs", None, loader=loader)
self._next_object = None
self._delayed_writes = []
self.next_addr = 0
self.map_size = map_size
self.set_arch(loader.main_object.arch)
self.provides = "extern-address space"
self.pic = True
self._import_symbols = {}
self._warned_data_import = False
self.tls_data_size = tls_size
self.tls_next_addr = 0
self._tls_mapped = False
def _finalize_tls(self):
if self._is_mapped or self._tls_mapped:
raise Exception("programming error")
if self.tls_data_size != 0:
self.tls_used = True
self.tls_data_start = self._allocate(self.tls_data_size, alignment=0x10)
self.tls_block_size = self.tls_data_size
self._tls_mapped = True
def rebase(self, new_base):
if self._is_mapped:
return
if not self._tls_mapped:
self._finalize_tls()
backer = bytearray(self.map_size)
for simdata in self._delayed_writes:
value = simdata.value()
start_addr = simdata.relative_addr
if simdata.type == SymbolType.TYPE_TLS_OBJECT:
start_addr += self.tls_data_size
backer[start_addr : start_addr + len(value)] = value
self.memory.add_backer(0, bytes(backer))
self.segments.append(ExternSegment(self.map_size))
super().rebase(new_base)
def make_extern(
self, name, size=0, alignment=None, thumb=False, sym_type=SymbolType.TYPE_FUNCTION, point_to=None, libname=None
) -> Symbol:
try:
return self._symbol_cache[name]
except KeyError:
pass
tls = sym_type == SymbolType.TYPE_TLS_OBJECT
SymbolCls = Symbol
if point_to is not None:
simdata = PointToPrecise
else:
simdata = lookup(name, libname)
if simdata is not None:
SymbolCls = simdata
size = simdata.static_size(self)
if sym_type != simdata.type:
l.warning("Symbol type mismatch between export request and response for %s. What's going on?", name)
real_size = max(size, 1)
if alignment is None:
alignment = self.arch.bytes
make_toc = getattr(self.loader.main_object, "is_ppc64_abiv1", False) and sym_type == SymbolType.TYPE_FUNCTION
toc_symbol = None
if make_toc:
# we make two symbols, one for the func and one for the toc
# the one for the func ends up named with the #func suffix, the toc gets the normal name
# we return the one for the toc
toc_symbol = self.make_extern(name, size=0x18, alignment=8, sym_type=SymbolType.TYPE_OBJECT)
name += "#func"
if size == 0 and sym_type in (SymbolType.TYPE_NONE, SymbolType.TYPE_OBJECT, SymbolType.TYPE_TLS_OBJECT):
l.warning(
"Symbol was allocated without a known size; emulation may fail if it is used non-opaquely: %s", name
)
self._warned_data_import = True
real_size = 8
local_addr = self._allocate(real_size, alignment=alignment, thumb=thumb, tls=tls)
if local_addr is None:
if self._next_object is None:
# we're at the end of the line. make a new extern object
# this should only be hit if we're doing this outside a loading pass
self._make_new_externs(real_size, alignment, tls)
return self._next_object.make_extern(
name, size=size, alignment=alignment, sym_type=sym_type, libname=libname
)
l.info("Created extern symbol for %s", name)
new_symbol = SymbolCls(self, name, local_addr, size, sym_type)
new_symbol.is_export = True
new_symbol.is_extern = True
if point_to is not None:
new_symbol.pointto_name = point_to.name
new_symbol.pointto_type = point_to.type
new_symbol.pointto_precise = point_to
self._symbol_cache[name] = new_symbol
self.symbols.add(new_symbol)
self._init_symbol(new_symbol)
if make_toc:
# write the pointer to the func into the toc
# i.e. make a relocation for it
# then if we're already mapped, apply the relocation manually
reloc = TOCRelocation(self, toc_symbol, toc_symbol.relative_addr)
reloc.resolve(new_symbol)
self.relocs.append(reloc)
if self._is_mapped:
reloc.relocate()
return toc_symbol
return new_symbol
def get_pseudo_addr(self, name) -> int:
if not self._is_mapped:
raise CLEError("Can't allocate with extern object before it is mapped")
return self.make_extern(name).rebased_addr
def allocate(self, size=1, alignment=8, thumb=False, tls=False) -> int:
if not self._is_mapped:
raise CLEError("Can't allocate with extern object before it is mapped")
result = self._allocate(size=size, alignment=alignment, thumb=thumb, tls=tls)
if result is None:
if self._next_object is None:
# we're at the end of the line. make a new extern object
# this should only be hit if we're doing this outside a loading pass
self._make_new_externs(size, alignment, tls)
result = self._next_object.allocate(size=size, alignment=alignment, thumb=thumb, tls=tls)
return result + (0 if tls else self.mapped_base)
def _make_new_externs(self, size, alignment, tls):
self._next_object = ExternObject(
self.loader,
map_size=max(size + alignment, 0x8000) if not tls else 0x8000,
tls_size=max(size + alignment, 0x1000) if tls else 0x1000,
)
self._next_object._finalize_tls()
self.loader._internal_load(self._next_object)
def _allocate(self, size=1, alignment=8, thumb=False, tls=False):
if tls:
start = self.tls_next_addr
limit = self.tls_data_size
else:
start = self.next_addr
limit = self.map_size
addr = ALIGN_UP(start, alignment) | thumb
next_start = addr + size
if next_start >= limit:
if self._is_mapped:
return None
else:
if tls:
self.tls_data_size += next_start - limit
else:
self.map_size += next_start - limit
if tls:
self.tls_next_addr = next_start
return addr
else:
self.next_addr = next_start
return addr
@property
def max_addr(self):
return AT.from_rva(self.map_size - 1, self).to_mva()
def make_import(self, name, sym_type):
if name not in self.imports:
sym = Symbol(self, name, 0, 0, sym_type)
sym.is_import = True
sym.is_extern = True
# this is kind of tricky... normally if you have an import and an export of the same name in the binary
# the two symbols are *the same symbol*, usually with a copy relocation. but we don't know ahead of time
# whether we will have the symbol here in externs, so we will not expose the import symbol to the rest of
# the world.
self._import_symbols[name] = sym
return sym
else:
sym = self._import_symbols[name]
if sym.type != sym_type:
raise CLEOperationError(
"Created the same extern import %s with two different types. Something isn't right!"
)
return sym
def _init_symbol(self, symbol):
if isinstance(symbol, SimData):
relocs = symbol.relocations()
self.relocs.extend(relocs)
if self._is_mapped:
# TODO: is this right for tls?
if symbol.type == SymbolType.TYPE_TLS_OBJECT:
self.memory.store(self.tls_block_size, symbol.value())
else:
self.memory.store(symbol.relative_addr, symbol.value())
for reloc in relocs:
reloc.relocate()
else:
self._delayed_writes.append(symbol)
class KernelObject(Backend):
def __init__(self, loader, map_size=0x8000):
super().__init__("cle##kernel", None, loader=loader)
self.map_size = map_size
self.set_arch(loader.main_object.arch)
self.memory.add_backer(0, bytes(map_size))
self.provides = "kernel space"
self.pic = True
def add_name(self, name, addr):
self._symbol_cache[name] = Symbol(self, name, AT.from_mva(addr, self).to_rva(), 1, SymbolType.TYPE_FUNCTION)
@property
def max_addr(self):
return AT.from_rva(self.map_size - 1, self).to_mva()
from .simdata import lookup, SimData
from .simdata.common import PointTo, SimDataSimpleRelocation
class PointToPrecise(PointTo):
pointto_precise = None
def relocations(self):
return [
SimDataSimpleRelocation(
self.owner,
self.pointto_precise,
self.relative_addr,
self.addend,
preresolved=True,
)
]

View File

@@ -0,0 +1,78 @@
from collections import defaultdict
from typing import List, Type, Optional
from ...relocation import Relocation
from ...symbol import Symbol, SymbolType
# pylint: disable=unused-argument,no-self-use
class SimData(Symbol):
"""
A SimData class is used to provide data when there is an unresolved data import symbol.
To use it, subclass this class and implement the below attributes and methods.
:cvar name: The name of the symbol to provide
:cvar libname: The name of the library from which the symbol originally comes (currently unused).
:cvar type: The type of the symbol, usually ``SymbolType.TYPE_OBJECT``.
Use the below `register` method to register SimData subclasses with CLE.
NOTE: SimData.type hides the Symbol.type instance property
"""
name = NotImplemented # type: str
type = NotImplemented # type: SymbolType
libname = NotImplemented # type: str
@classmethod
def static_size(cls, owner) -> int:
"""
Implement me: return the size of the symbol in bytes before it gets constructed
:param owner: The ExternObject owning the symbol-to-be. Useful to get at ``owner.arch``.
"""
return NotImplemented
def value(self) -> bytes:
"""
Implement me: the initial value of the bytes in memory for the symbol. Should return a
bytestring of the same length as static_size returned. (owner is ``self.owner`` now)
"""
return NotImplemented
def relocations(self) -> List[Relocation]:
"""
Maybe implement me: If you like, return a list of relocation objects to apply. To create
new import symbols, use ``self.owner.make_extern_import``.
"""
return []
registered_data = defaultdict(list)
def register(simdata_cls: Type[SimData]):
"""
Register the given SimData class with CLE so it may be used during loading
"""
if simdata_cls.name is None:
return
registered_data[simdata_cls.name].append(simdata_cls)
def lookup(name: str, libname) -> Optional[Type[SimData]]:
weak_option = None
for simdata_cls in registered_data[name]:
if type(libname) is type(simdata_cls.libname) is str and simdata_cls.libname.startswith(libname):
return simdata_cls
elif simdata_cls is None or libname is None:
weak_option = simdata_cls
return weak_option
# pylint: disable=unused-import
from . import io_file
from . import glibc_startup

View File

@@ -0,0 +1,106 @@
import struct
from . import SimData
from ...relocation import Relocation
from ...symbol import SymbolType
class StaticData(SimData):
"""
A simple SimData utility class to use when you have a SimData which should provide just a static
set of bytes. To use, implement the following:
:cvar name: The name of the symbol to provide.
:cvar libname: The name of the library from which the symbol originally comes (currently unused).
:cvar data: The bytes to provide
"""
type = SymbolType.TYPE_OBJECT
data = NotImplemented # type: bytes
@classmethod
def static_size(cls, owner):
return len(cls.data)
def value(self):
return self.data
class StaticWord(SimData):
"""
A simple SimData utility class to use when you have a SimData which should provide just a static
integer. To use, implement the following:
:cvar name: The name of the symbol to provide.
:cvar libname: The name of the library from which the symbol originally comes (currently unused).
:cvar word: The value to provide
:cvar wordsize: (optional) The size of the value in bytes, default the CPU wordsize
"""
type = SymbolType.TYPE_OBJECT
word = NotImplemented # type: int
wordsize = None # type: int
@classmethod
def static_size(cls, owner):
return owner.arch.bytes if cls.wordsize is None else cls.wordsize
def value(self):
return struct.pack(self.owner.arch.struct_fmt(size=self.wordsize), self.word)
class PointTo(SimData):
"""
A simple SimData utility class to use when you have a SimData which should provide just a
pointer to some other symbol. To use, implement the following:
:cvar name: The name of the symbol to provide.
:cvar libname: The name of the library from which the symbol originally comes (currently unused).
:cvar pointto_name: The name of the symbol to point to
:cvar pointto_type: The type of the symbol to point to (usually ``SymbolType.TYPE_FUNCTION`` or
``SymbolType.TYPE_OBJECT``)
:cvar addend: (optional) an integer to be added to the symbol's address before storage
"""
pointto_name = NotImplemented # type: str
pointto_type = NotImplemented # type: SymbolType
type = SymbolType.TYPE_OBJECT # type: SymbolType
addend = 0 # type: int
@classmethod
def static_size(cls, owner):
return owner.arch.bytes
def value(self):
return bytes(self.size)
def relocations(self):
return [
SimDataSimpleRelocation(
self.owner,
self.owner.make_import(self.pointto_name, self.pointto_type),
self.relative_addr,
self.addend,
)
]
class SimDataSimpleRelocation(Relocation):
"""
A relocation used to implement PointTo. Pretty simple.
"""
def __init__(self, owner, symbol, addr, addend, preresolved=False):
super().__init__(owner, symbol, addr)
self.addend = addend
self.preresolved = preresolved
def resolve_symbol(self, solist, **kwargs):
if self.preresolved:
self.resolve(self.symbol)
else:
super().resolve_symbol(solist, **kwargs)
@property
def value(self):
return self.resolvedby.rebased_addr + self.addend

View File

@@ -0,0 +1,117 @@
from . import SimData, register
from .common import PointTo, StaticWord
from ...symbol import SymbolType
class DummyProgname(SimData):
name = "_dummy_progname"
type = SymbolType.TYPE_OBJECT
libname = "libc.so"
progname = b"./program\0"
@classmethod
def static_size(cls, owner):
return len(cls.progname)
def value(self):
return self.progname
class Progname(PointTo):
pointto_name = "_dummy_progname"
pointto_type = SymbolType.TYPE_OBJECT
name = "__progname"
libname = "libc.so"
type = SymbolType.TYPE_OBJECT
addend = 2
class PrognameFull(PointTo):
pointto_name = "_dummy_progname"
pointto_type = SymbolType.TYPE_OBJECT
name = "__progname_full"
libname = "libc.so.6"
type = SymbolType.TYPE_OBJECT
addend = 0
class EnvironmentPointer(StaticWord):
name = "__environ"
libname = "libc.so"
word = 0
class EnvironmentPointerAlso(StaticWord):
name = "environ"
libname = "libc.so"
word = 0
class OptInd(StaticWord):
name = "optind"
libname = "libc.so"
word = 1
wordsize = 4
class OptArg(StaticWord):
name = "optarg"
libname = "libc.so"
word = 0
class Errno(StaticWord):
type = SymbolType.TYPE_TLS_OBJECT
name = "errno"
libname = "libc.so"
word = 0
wordsize = 4
class LibcStackEnd(StaticWord):
name = "__libc_stack_end"
libname = "ld-linux"
word = 0
class RTLDGlobal(SimData):
name = "_rtld_global"
type = SymbolType.TYPE_OBJECT
libname = "ld-linux"
@classmethod
def static_size(cls, owner):
return {"AMD64": 3960, "X86": 2100}.get(owner.arch.name, 1024)
def value(self):
return bytes(self.static_size(self.owner))
class RTLDGlobalRO(SimData):
name = "_rtld_global_ro"
type = SymbolType.TYPE_OBJECT
libname = "ld-linux"
@classmethod
def static_size(cls, owner):
return {
"AMD64": 440,
"X86": 576,
}.get(owner.arch.name, 256)
def value(self):
return bytes(self.static_size(self.owner))
register(DummyProgname)
register(Progname)
register(PrognameFull)
register(EnvironmentPointer)
register(EnvironmentPointerAlso)
register(OptInd)
register(OptArg)
register(Errno)
register(LibcStackEnd)
register(RTLDGlobal)
register(RTLDGlobalRO)

View File

@@ -0,0 +1,123 @@
import struct
import logging
from . import SimData, register
from ...symbol import SymbolType
from .common import PointTo
l = logging.getLogger(name=__name__)
#
# Here, we define a specific structure (part of it at least) for the FILE structure.
# These offsets are copied from glibc for maximum compatibility, but we are effectively
# implementing SOME libc with these symbols, so we need SOME implementation of FILE.
#
# this is supposed to be an opaque structure, the internals of which are only cared about
# by an angr simprocedure or whatever implements the fread/fwrite/etc we're linking to. And since we're linking to
# this crap instead of a real stdin/stdout/etc, someone in python land will probably be the guy which needs ABI
# compatibility with us.
#
# however, it is also a desirable property that this is abi-compatible with glibc or something so the someone in python
# land could use this to interface with the "real" structure, which would be filled out by someone other than the
# below code. To this end we so far only have the fileno, but we could add more things like buffers
#
_IO_FILE = {
"MIPS32": {
"size": 148,
"fd": 0x38,
},
"X86": {
"size": 148,
"fd": 0x38,
},
"AMD64": {
"size": 216,
"fd": 0x70,
},
# Bionic libc does not use __IO_FILE
# Refer to http://androidxref.com/5.1.1_r6/xref/bionic/libc/include/stdio.h
# __sFILE replaces __IO_FILE
# _file replaces _fileno
"ARM": {
"size": 84,
"fd": 0x0E,
},
"AARCH64": {
"size": 152,
"fd": 0x14,
},
}
_IO_FILE["ARMEL"] = _IO_FILE["ARM"]
_IO_FILE["ARMHF"] = _IO_FILE["ARM"]
def io_file_data_for_arch(arch):
if arch.name not in _IO_FILE:
l.error("missing _IO_FILE offsets for arch: %s", arch.name)
return _IO_FILE["AMD64"]
return _IO_FILE[arch.name]
class IoFilePointer(PointTo):
libname = "libc.so.6"
pointto_type = SymbolType.TYPE_OBJECT
class IoStdinPointer(IoFilePointer):
name = "stdin"
pointto_name = "_io_stdin"
class IoStdoutPointer(IoFilePointer):
name = "stdout"
pointto_name = "_io_stdout"
class IoStderrPointer(IoFilePointer):
name = "stderr"
pointto_name = "_io_stderr"
class IoFile(SimData):
libname = "libc.so.6"
type = SymbolType.TYPE_OBJECT
fd = NotImplemented # type: int
@classmethod
def static_size(cls, owner):
return io_file_data_for_arch(owner.arch)["size"]
# the canonical verision of this should be the FILEBUF_LITERAL macro from glibc
# for maximum hyperrealism we could have a dependency on the IO_jumps table which would have dependencies on
# all the functions we could care about which would be implemented by simprocedures
# but that's way overkill. see above discussion.
def value(self):
val = bytearray(self.size)
struct.pack_into(self.owner.arch.struct_fmt(size=4), val, io_file_data_for_arch(self.owner.arch)["fd"], self.fd)
struct.pack_into(self.owner.arch.struct_fmt(size=4), val, 0, 0xFBAD2088)
return bytes(val)
class IoStdin(IoFile):
name = "_io_stdin"
fd = 0
class IoStdout(IoFile):
name = "_io_stdout"
fd = 1
class IoStderr(IoFile):
name = "_io_stderr"
fd = 2
register(IoStdinPointer)
register(IoStdoutPointer)
register(IoStderrPointer)
register(IoStdin)
register(IoStdout)
register(IoStderr)

View File

@@ -0,0 +1,154 @@
import re
import logging
import binascii
import struct
from . import register_backend, Backend
from ..errors import CLEError
l = logging.getLogger(name=__name__)
__all__ = ("Hex",)
intel_hex_re = re.compile(
b":([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])"
b"([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F]+)*([0-9a-fA-F][0-9a-fA-F])"
)
HEX_TYPE_DATA = 0x00
HEX_TYPE_EOF = 0x01
HEX_TYPE_EXTSEGADDR = 0x02
HEX_TYPE_STARTSEGADDR = 0x03
HEX_TYPE_EXTLINEARADDR = 0x04
HEX_TYPE_STARTLINEARADDR = 0x05
if bytes is not str:
chh = lambda x: x
else:
chh = ord
class Hex(Backend):
"""
A loader for Intel Hex Objects
See https://en.wikipedia.org/wiki/Intel_HEX
"""
is_default = True # Tell CLE to automatically consider using the Hex backend
@staticmethod
def parse_record(line):
m = intel_hex_re.match(line)
if not m:
raise CLEError("Invalid HEX record: " + line)
my_cksum = 0
count, addr, rectype, data, cksum = m.groups()
cksum = int(cksum, 16)
for d in binascii.unhexlify(line[1:-2]):
my_cksum = (my_cksum + chh(d)) % 256
my_cksum = ((my_cksum ^ 0xFF) + 1) % 256
if my_cksum != cksum:
raise CLEError(f"Invalid checksum: Computed {hex(my_cksum)}, found {hex(cksum)}")
count = int(count, 16)
addr = int(addr, 16)
rectype = int(rectype, 16)
if data:
data = binascii.unhexlify(data)
if data and count != len(data):
raise CLEError("Data length field does not match length of actual data: " + line)
return rectype, addr, data
@staticmethod
def coalesce_regions(regions):
# Lots of tiny memory regions is bad!
# The greedy algorithm to smash them together:
result = []
for addr, region in sorted(regions):
if result and result[-1][0] + len(result[-1][1]) == addr:
result[-1] = (result[-1][0], result[-1][1] + region)
else:
result.append((addr, region))
return result
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.arch is None:
raise CLEError("To use the Hex binary backend, you need to specify an architecture in the loader options.")
# Do the whole thing in one shot.
self.os = "unknown"
got_base = False
got_entry = False
self._binary_stream.seek(0)
string = self._binary_stream.read()
recs = string.splitlines()
regions = []
max_addr = 0
min_addr = 0xFFFFFFFFFFFFFFFF
self._base_address = 0
for rec in recs:
rectype, addr, data = Hex.parse_record(rec)
if rectype == HEX_TYPE_DATA:
addr += self._base_address
# l.debug("Loading %d bytes at " % len(data) + hex(addr))
# Raw data. Put the bytes
regions.append((addr, data))
# We have to be careful about the min and max addrs
if addr < min_addr:
min_addr = addr
max_addr = max(max_addr, addr + len(data) - 1)
elif rectype == HEX_TYPE_EOF:
# EOF
l.debug("Got EOF record.")
break
elif rectype == HEX_TYPE_EXTSEGADDR:
# "Extended Mode" Segment address, take this value, multiply by 16, make the base
self._base_address = struct.unpack(">H", data)[0] * 16
got_base = True
l.debug("Loading a segment at %#x", self._base_address)
elif rectype == HEX_TYPE_STARTSEGADDR:
# Four bytes, the segment and the initial IP
got_base = True
got_entry = True
self._initial_cs, self._initial_ip = struct.unpack(">HH", data)
# The whole thing is the entry, as far as angr is concerned.
self._entry = struct.unpack(">I", data)[0]
l.debug("Got entry point at %#x", self._entry)
elif rectype == HEX_TYPE_EXTLINEARADDR:
got_base = True
# Specifies the base for all future data bytes.
self._base_address = struct.unpack(">H", data)[0] << 16
l.debug("Loading a segment at %#x", self._base_address)
elif rectype == HEX_TYPE_STARTLINEARADDR:
got_entry = True
# The 32-bit EIP, really the same as STARTSEGADDR, but some compilers pick one over the other.
self._entry = struct.unpack(">I", data)[0]
l.debug("Found entry point at %#x", self._entry)
self._initial_eip = self._entry
else:
raise CLEError("This HEX Object type is not implemented: " + hex(rectype))
if not got_base:
l.warning("No base address was found in this HEX object file. It is assumed to be 0")
if not got_entry:
l.warning(
"No entry point was found in this HEX object file, and it is assumed to be 0. "
"Specify one with `entry_point` to override."
)
# HEX specifies a ton of tiny little memory regions. We now smash them together to make things faster.
new_regions = Hex.coalesce_regions(regions)
for addr, data in new_regions:
self.memory.add_backer(addr, data)
self._max_addr = max_addr
self._min_addr = min_addr
@staticmethod
def is_compatible(stream):
stream.seek(0)
s = stream.read(0x10)
stream.seek(0)
return s.startswith(b":")
register_backend("hex", Hex)

Some files were not shown because too many files have changed in this diff Show More