Moved the link work to a Linker class.
Added exception handling in main.
Make the Dynamic section use SStrtab and Elf64_Dyn types instead of its own types.
Move all errors in a dedicated file.
Check for undefined symbols.
Check that input files are relocatable objects and of a supported type.
Bits of source cleaning.
--- /dev/null
+# -*- coding: utf-8 -*-
+
+# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
+
+# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
+#
+# This file is part of bold, the Byte Optimized Linker.
+#
+# You can redistribute this file and/or modify it under the terms of the
+# GNU Lesser General Public License as published by the Free Software
+# Foundation, version 2.1.
+
+from array import array
+import struct
+
+class BinArray(array):
+ """A specialized array that contains bytes"""
+ def __new__(cls, data=None):
+ if data:
+ return array.__new__(BinArray, "B", data)
+ else:
+ return array.__new__(BinArray, "B")
--- /dev/null
+# -*- coding: utf-8 -*-
+
+# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
+
+# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
+#
+# This file is part of bold, the Byte Optimized Linker.
+#
+# You can redistribute this file and/or modify it under the terms of the
+# GNU Lesser General Public License as published by the Free Software
+# Foundation, version 2.1.
+
+"""
+ Empty file in place.
+ Utilities lies in here,
+ Ready to be used.
+"""
\ No newline at end of file
--- /dev/null
+# -*- coding: utf-8 -*-
+
+# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
+
+# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
+#
+# This file is part of bold, the Byte Optimized Linker.
+# Heavily inspired by elf.h from the GNU C Library.
+#
+# You can redistribute this file and/or modify it under the terms of the
+# GNU Lesser General Public License as published by the Free Software
+# Foundation, version 2.1.
+
+"""This file defines standard ELF constants."""
+
+class SymbolicConstant(long):
+ """Allows you to map a symbolic name with a given integer."""
+ _symbolics = {}
+ _default = None
+ def __new__(cls, value, symbolic=None):
+ if symbolic:
+ cls._symbolics[value] = symbolic
+ return long.__new__(cls, value)
+
+ def __str__(self):
+ if long(self) in self._symbolics:
+ return self._symbolics[long(self)]
+ elif self._default:
+ return self._default % long(self)
+ else:
+ return str(long(self))
+
+
+class ElfClass(SymbolicConstant):
+ _symbolics = {}
+ELFCLASSNONE = ElfClass(0, "Invalid ELF class")
+ELFCLASS32 = ElfClass(1, "ELF32")
+ELFCLASS64 = ElfClass(2, "ELF64")
+
+
+class ElfData(SymbolicConstant):
+ _symbolics = {}
+ELFDATANONE = ElfData(0, "Invalid data encoding")
+ELFDATA2LSB = ElfData(1, "Little endian")
+ELFDATA2MSB = ElfData(2, "Big endian")
+
+
+class ElfVersion(SymbolicConstant):
+ _symbolics = {}
+EV_NONE = ElfVersion(0, "Invalid ELF version")
+EV_CURRENT = ElfVersion(1, "Current version (1)")
+
+
+class ElfOsAbi(SymbolicConstant):
+ _symbolics = {}
+# Fill me
+ELFOSABI_NONE = ElfOsAbi(0, "UNIX - System V")
+ELFOSABI_SYSV = ElfOsAbi(0, "UNIX - System V")
+
+
+class ElfType(SymbolicConstant):
+ _symbolics = {}
+ET_NONE = ElfType(0, "No file type")
+ET_REL = ElfType(1, "Relocatable file")
+ET_EXEC = ElfType(2, "Executable file")
+ET_DYN = ElfType(3, "Shared object file")
+ET_CORE = ElfType(4, "Core file")
+
+
+class ElfMachine(SymbolicConstant):
+ _symbolics = {}
+# Fill me
+EM_NONE = ElfMachine(0, "No machine")
+EM_386 = ElfMachine(3, "Intel 80386")
+EM_X86_64 = ElfMachine(62, "AMD x86-64 architecture")
+
+class ElfSectionIndex(SymbolicConstant):
+ _symbolics = {}
+SHN_UNDEF = ElfSectionIndex(0, "UND")
+SHN_ABS = ElfSectionIndex(0xfff1, "ABS")
+SHN_COMMON = ElfSectionIndex(0xfff2, "COM")
+
+class ElfShType(SymbolicConstant):
+ _symbolics = {}
+SHT_NULL = ElfShType(0, "NULL")
+SHT_PROGBITS = ElfShType(1, "PROGBITS")
+SHT_SYMTAB = ElfShType(2, "SYMTAB")
+SHT_STRTAB = ElfShType(3, "STRTAB")
+SHT_RELA = ElfShType(4, "RELA")
+SHT_HASH = ElfShType(5, "HASH")
+SHT_DYNAMIC = ElfShType(6, "DYNAMIC")
+SHT_NOTE = ElfShType(7, "NOTE")
+SHT_NOBITS = ElfShType(8, "NOBITS")
+SHT_REL = ElfShType(9, "REL")
+SHT_SHLIB = ElfShType(10, "SHLIB")
+SHT_DYNSYM = ElfShType(11, "DYNSYM")
+
+SHF_WRITE = 0x1
+SHF_ALLOC = 1 << 1
+SHF_EXECINSTR = 1 << 2
+SHF_MERGE = 1 << 4
+SHF_STRINGS = 1 << 5
+SHF_INFO_LINK = 1 << 6
+SHF_LINK_ORDER = 1 << 7
+SHF_OS_NONCONFORMING = 1 << 8
+SHF_GROUP = 1 << 9
+SHF_TLS = 1 << 10
+SHF_MASKOS = 0x0f00000
+SHF_MASKPROC = 0xf000000
+
+STN_UNDEF = 0
+
+
+class ElfSymbolBinding(SymbolicConstant):
+ _symbolics = {}
+STB_LOCAL = ElfSymbolBinding(0, "LOCAL")
+STB_GLOBAL = ElfSymbolBinding(1, "GLOBAL")
+STB_WEAK = ElfSymbolBinding(2, "WEAK")
+
+
+class ElfSymbolType(SymbolicConstant):
+ _symbolics = {}
+STT_NOTYPE = ElfSymbolType(0, "NOTYPE")
+STT_OBJECT = ElfSymbolType(1, "OBJECT")
+STT_FUNC = ElfSymbolType(2, "FUNC")
+STT_SECTION = ElfSymbolType(3, "SECTION")
+STT_FILE = ElfSymbolType(4, "FILE")
+STT_COMMON = ElfSymbolType(5, "COMMON")
+STT_TLS = ElfSymbolType(6, "TLS")
+
+
+class ElfSymbolVisibility(SymbolicConstant):
+ _symbolics = {}
+STV_DEFAULT = ElfSymbolVisibility(0, "DEFAULT")
+STV_INTERNAL = ElfSymbolVisibility(1, "INTERN")
+STV_HIDDEN = ElfSymbolVisibility(2, "HIDDEN")
+STV_PROTECTED = ElfSymbolVisibility(3, "PROTECTED")
+
+
+class ElfPhType(SymbolicConstant):
+ _symbolics = {}
+PT_NULL = ElfPhType(0, "NULL")
+PT_LOAD = ElfPhType(1, "LOAD")
+PT_DYNAMIC = ElfPhType(2, "DYNAMIC")
+PT_INTERP = ElfPhType(3, "INTERP")
+PT_NOTE = ElfPhType(4, "NOTE")
+PT_SHLIB = ElfPhType(5, "SHLIB")
+PT_PHDR = ElfPhType(6, "PHDR")
+PT_TLS = ElfPhType(7, "TLS")
+
+PF_X = (1 << 0)
+PF_W = (1 << 1)
+PF_R = (1 << 2)
+
+class ElfDynamicType(SymbolicConstant):
+ _symbolics = {}
+ _default = "Unknown (0x%x)"
+DT_NULL = ElfDynamicType(0, "NULL")
+DT_NEEDED = ElfDynamicType(1, "NEEDED")
+DT_PLTRELSZ = ElfDynamicType(2, "PLTRELSZ")
+DT_PLTGOT = ElfDynamicType(3, "PLTGOT")
+DT_HASH = ElfDynamicType(4, "HASH")
+DT_STRTAB = ElfDynamicType(5, "STRTAB")
+DT_SYMTAB = ElfDynamicType(6, "SYMTAB")
+DT_RELA = ElfDynamicType(7, "RELA")
+DT_RELASZ = ElfDynamicType(8, "RELASZ")
+DT_RELAENT = ElfDynamicType(9, "RELAENT")
+DT_STRSZ = ElfDynamicType(10, "STRSZ")
+DT_SYMENT = ElfDynamicType(11, "SYMENT")
+DT_INIT = ElfDynamicType(12, "INIT")
+DT_FINI = ElfDynamicType(13, "FINI")
+DT_SONAME = ElfDynamicType(14, "SONAME")
+DT_RPATH = ElfDynamicType(15, "RPATH")
+DT_SYMBOLIC = ElfDynamicType(16, "SYMBOLIC")
+DT_REL = ElfDynamicType(17, "REL")
+DT_RELSZ = ElfDynamicType(18, "RELSZ")
+DT_RELENT = ElfDynamicType(19, "RELENT")
+DT_PLTREL = ElfDynamicType(20, "PLTREL")
+DT_DEBUG = ElfDynamicType(21, "DEBUG")
+DT_TEXTREL = ElfDynamicType(22, "TEXTREL")
+DT_JMPREL = ElfDynamicType(23, "JMPREL")
+DT_BIND_NOW = ElfDynamicType(24, "BIND_NOW")
+DT_INIT_ARRAY = ElfDynamicType(25, "INIT_ARRAY")
+DT_FINI_ARRAY = ElfDynamicType(26, "FINI_ARRAY")
+DT_INIT_ARRAYSZ = ElfDynamicType(27, "INIT_ARRAYSZ")
+DT_FINI_ARRAYSZ = ElfDynamicType(28, "FINI_ARRAYSZ")
+DT_RUNPATH = ElfDynamicType(29, "RUNPATH")
+DT_FLAGS = ElfDynamicType(30, "FLAGS")
+DT_ENCODING = ElfDynamicType(31, "ENCODING")
+DT_PREINIT_ARRAY = ElfDynamicType(32, "PREINIT_ARRAY")
+DT_PREINIT_ARRAYSZ = ElfDynamicType(33, "PREINIT_ARRAYSZ")
+
+# AMD x86-64 relocations
+class Amd64Relocation(SymbolicConstant):
+ _symbolics = {}
+
+R_X86_64_NONE = Amd64Relocation(0, "NONE")
+R_X86_64_64 = Amd64Relocation(1, "64")
+R_X86_64_PC32 = Amd64Relocation(2, "PC32")
+R_X86_64_GOT32 = Amd64Relocation(3, "GOT32")
+R_X86_64_PLT32 = Amd64Relocation(4, "PLT32")
+R_X86_64_COPY = Amd64Relocation(5, "COPY")
+R_X86_64_GLOB_DAT = Amd64Relocation(6, "GLOB_DAT")
+R_X86_64_JUMP_SLOT = Amd64Relocation(7, "JUMP_SLOT")
+R_X86_64_RELATIVE = Amd64Relocation(8, "RELATIVE")
+R_X86_64_GOTPCREL = Amd64Relocation(9, "GOTPCREL")
+R_X86_64_32 = Amd64Relocation(10, "32")
+R_X86_64_32S = Amd64Relocation(11, "32S")
+R_X86_64_16 = Amd64Relocation(12, "16")
+R_X86_64_PC16 = Amd64Relocation(13, "PC16")
+R_X86_64_8 = Amd64Relocation(14, "8")
+R_X86_64_PC8 = Amd64Relocation(15, "PC8")
+R_X86_64_DTPMOD64 = Amd64Relocation(16, "DTPMOD64")
+R_X86_64_DTPOFF64 = Amd64Relocation(17, "DTPOFF64")
+R_X86_64_TPOFF64 = Amd64Relocation(18, "TPOFF64")
+R_X86_64_TLSGD = Amd64Relocation(19, "TLSGD")
+R_X86_64_TLSLD = Amd64Relocation(20, "TLSLD")
+R_X86_64_DTPOFF32 = Amd64Relocation(21, "DTPOFF32")
+R_X86_64_GOTTPOFF = Amd64Relocation(22, "GOTTPOFF")
+R_X86_64_TPOFF32 = Amd64Relocation(23, "TPOFF32")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null
+# -*- coding: utf-8 -*-
+
+# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
+
+# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
+#
+# This file is part of bold, the Byte Optimized Linker.
+#
+# You can redistribute this file and/or modify it under the terms of the
+# GNU Lesser General Public License as published by the Free Software
+# Foundation, version 2.1.
+
+
+from BinArray import BinArray
+from constants import *
+from errors import *
+import struct
+
+# Helpful decorator
+def nested_property(c):
+ return property(**c())
+
+#--------------------------------------------------------------------------
+# Elf
+#--------------------------------------------------------------------------
+
+class Elf64(object):
+ """Handles an Elf64 object."""
+ interpreter = "/lib64/ld-linux-x86-64.so.2"
+
+ def __init__(self, path=None):
+ object.__init__(self)
+ self.header = Elf64_Ehdr()
+ self.header.owner = self
+ self.shdrs = []
+ self.phdrs = []
+ self.shlibs = []
+ self.sections = {}
+ self.segments = []
+ self.local_symbols = {}
+ self.global_symbols = {}
+ self.undefined_symbols = []
+
+ if path:
+ self.fromfile(path)
+
+ # Functions for relocatables files used as input
+
+ def fromfile(self, path):
+ f = file(path, "rb")
+
+ # Load Elf header
+ data = BinArray()
+ data.fromfile(f, Elf64_Ehdr.size)
+ self.header.fromBinArray(data)
+
+ # This linker only supports relocatable objects
+ if self.header.e_type != ET_REL:
+ raise NotRelocatableObject(path)
+
+ if self.header.e_ident.ei_class != ELFCLASS64:
+ raise UnsupportedObject(path, "Not %s" % ELFCLASS64)
+
+ if self.header.e_machine != EM_X86_64:
+ raise UnsupportedObject(path, "Not %s" % EM_X86_64)
+
+ # Load sections headers
+ f.seek(self.header.e_shoff)
+ for i in range(self.header.e_shnum):
+ data = BinArray()
+ data.fromfile(f, self.header.e_shentsize)
+ h = Elf64_Shdr(i, data)
+ h.owner = self
+ self.shdrs.append(h)
+
+ # Read sections content
+ for sh in self.shdrs:
+ data = BinArray()
+ if sh.sh_type != SHT_NOBITS:
+ f.seek(sh.sh_offset)
+ data.fromfile(f, sh.sh_size)
+ sh.content = data
+
+ f.close()
+
+ def resolve_names(self):
+ # The .shstrtab index is in Elf Header. find the sections names
+ strtab = self.shdrs[self.header.e_shstrndx].content
+
+ for sh in self.shdrs:
+ sh.name = strtab[int(sh.sh_name)]
+ self.sections[sh.name] = sh
+
+ # And resolve names in the section itself
+ sh.resolve_names()
+
+
+ def find_symbols(self):
+ for sh in self.shdrs:
+ if sh.sh_type == SHT_SYMTAB:
+ symtab = sh.content.symtab
+
+ for symbol in symtab:
+ if symbol.st_type == STT_FILE:
+ continue
+ if symbol.st_shndx == SHN_ABS:
+ continue
+ if symbol.st_shndx == SHN_UNDEF:
+ if symbol.name:
+ self.undefined_symbols.append(symbol.name)
+ continue
+
+ target_section = self.shdrs[symbol.st_shndx]
+
+ symbol_name = symbol.name
+ value = symbol.st_value
+ bind = symbol.st_binding
+
+ # We got a name, a target section, and an offset in the section
+ if symbol.st_binding == STB_LOCAL:
+ if symbol.st_type == STT_SECTION:
+ symbol_name = target_section.name
+ self.local_symbols[symbol_name] = (target_section, value)
+ else:
+ self.global_symbols[symbol_name] = (target_section, value)
+
+ def apply_relocation(self, all_global_symbols):
+ # find relocation tables
+ relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]]
+ for sh in relocations:
+ target = sh.target.content
+
+ for reloc in sh.content.relatab:
+
+ if reloc.symbol.st_shndx == SHN_UNDEF:
+ # This is an extern symbol, find it in all_global_symbols
+ sym_address = all_global_symbols[reloc.symbol.name]
+ else:
+ # source == in which section it is defined
+ source = self.shdrs[reloc.symbol.st_shndx].content
+ sym_address = source.virt_addr + reloc.symbol.st_value
+
+ target_ba = target.data # The actual BinArray that we'll modify
+ pc_address = target.virt_addr + reloc.r_offset
+
+ if reloc.r_type == R_X86_64_64:
+ format = "<Q" # Direct 64 bit address
+ target_value = sym_address + reloc.r_addend
+ elif reloc.r_type == R_X86_64_PC32:
+ format = "<i" # PC relative 32 bit signed
+ target_value = sym_address + reloc.r_addend - pc_address
+ elif reloc.r_type == R_X86_64_32:
+ format = "<I" # Direct 32 bit zero extended
+ target_value = sym_address + reloc.r_addend
+ elif reloc.r_type == R_X86_64_PC16:
+ format = "<h" # 16 bit sign extended pc relative
+ target_value = sym_address + reloc.r_addend - pc_address
+ elif reloc.r_type == R_X86_64_16:
+ format = "<H" # Direct 16 bit zero extended
+ target_value = sym_address + reloc.r_addend
+ elif reloc.r_type == R_X86_64_PC8:
+ format = "b" # 8 bit sign extended pc relative
+ target_value = sym_address + reloc.r_addend - pc_address
+ elif reloc.r_type == R_X86_64_8:
+ format = "b" # Direct 8 bit sign extended
+ target_value = sym_address + reloc.r_addend
+ else:
+ print "Unsupported relocation type: %s" % reloc.r_type
+ exit(1)
+
+ d = BinArray(struct.pack(format, target_value))
+ start = reloc.r_offset
+ end = start + len(d)
+ target_ba[start:end] = d
+
+
+ # Functions for executables files, as output
+
+ def add_phdr(self, phdr):
+ self.phdrs.append(phdr)
+ self.header.e_phnum = len(self.phdrs)
+ phdr.owner = self
+
+ def add_segment(self, segment):
+ self.segments.append(segment)
+
+ def layout(self, base_vaddr):
+ """Do the actual layout for final executable."""
+
+ virt_addr = base_vaddr
+ file_offset = 0
+ self.virt_addr = base_vaddr
+ self.file_offset = file_offset
+ for s in self.segments:
+ virt_addr += s.align
+ s.virt_addr = virt_addr
+ s.file_offset = file_offset
+ s.layout()
+ virt_addr += s.logical_size
+ file_offset += s.physical_size
+
+ def toBinArray(self):
+ ba = BinArray()
+ for s in self.segments:
+ ba.extend(s.toBinArray())
+ return ba
+
+
+#--------------------------------------------------------------------------
+# Elf file header
+#--------------------------------------------------------------------------
+
+class Elf64_eident(object):
+ """Detailed representation for the Elf identifier."""
+ format = "16B"
+ size = struct.calcsize(format)
+ physical_size = size
+ logical_size = size
+
+ def __init__(self, rawdata=None):
+ object.__init__(self)
+ if rawdata is not None:
+ self.fromBinArray(rawdata)
+
+ def fromBinArray(self, rawdata):
+ t = struct.unpack(self.format, rawdata)
+ self.ei_magic = rawdata[:4]
+ self.ei_class = ElfClass(rawdata[4])
+ self.ei_data = ElfData(rawdata[5])
+ self.ei_version = ElfVersion(rawdata[6])
+ self.ei_osabi = ElfOsAbi(rawdata[7])
+ self.ei_abiversion = 0
+ self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
+
+ def make_default_amd64(self):
+ self.ei_magic = BinArray([0x7f, 0x45, 0x4c, 0x46])
+ self.ei_class = ELFCLASS64
+ self.ei_data = ELFDATA2LSB
+ self.ei_version = EV_CURRENT
+ self.ei_osabi = ELFOSABI_SYSV
+ self.ei_abiversion = 0
+ self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
+
+ def toBinArray(self):
+ ba = BinArray(self.ei_magic)
+ ba.append(self.ei_class)
+ ba.append(self.ei_data)
+ ba.append(self.ei_version)
+ ba.append(self.ei_osabi)
+ ba.append(self.ei_abiversion)
+ ba.extend(self.ei_pad)
+ return ba
+
+
+class Elf64_Ehdr(object):
+ """Elf file header"""
+ format = "<16B 2H I 3Q I 6H"
+ size = struct.calcsize(format)
+ physical_size = size
+ logical_size = size
+
+ def __init__(self, rawdata=None):
+ object.__init__(self)
+ self.e_ident = Elf64_eident()
+ self.e_type = ET_NONE
+ self.e_machine = EM_X86_64
+ self.e_version = EV_CURRENT
+ self.e_entry = 0
+ self.e_phoff = 0
+ self.e_shoff = 0
+ self.e_flags = 0
+ self.e_ehsize = self.size
+ self.e_phentsize = Elf64_Phdr.size
+ self.e_phnum = 0
+ self.e_shentsize = Elf64_Shdr.size
+ self.e_shnum = 0
+ self.e_shstrndx = 0
+ if rawdata is not None:
+ self.fromBinArray(rawdata)
+
+ def fromBinArray(self, rawdata):
+ t = struct.unpack(self.format, rawdata)
+ self.e_ident = Elf64_eident(BinArray(rawdata[:16]))
+ self.e_type = ElfType(t[16])
+ self.e_machine = ElfMachine(t[17])
+ self.e_version = ElfVersion(t[18])
+ self.e_entry = t[19]
+ self.e_phoff = t[20]
+ self.e_shoff = t[21]
+ self.e_flags = t[22]
+ self.e_ehsize = t[23]
+ self.e_phentsize = t[24]
+ self.e_phnum = t[25]
+ self.e_shentsize = t[26]
+ self.e_shnum = t[27]
+ self.e_shstrndx = t[28]
+
+ def toBinArray(self):
+ # Build a list from e_ident and all other fields, to feed struct.pack.
+ values = self.e_ident.toBinArray().tolist()
+ values.extend([self.e_type, self.e_machine, self.e_version, self.e_entry,
+ self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize,
+ self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx])
+ res = struct.pack(self.format, *values)
+ return BinArray(res)
+
+ def layout(self):
+ pass
+
+
+#--------------------------------------------------------------------------
+# Elf Sections
+#--------------------------------------------------------------------------
+
+class Elf64_Shdr(object):
+ """Elf64 section header."""
+ format = "<2I 4Q 2I 2Q"
+ size = struct.calcsize(format)
+ physical_size = size
+ logical_size = size
+
+ def __init__(self, index=None, rawdata=None):
+ object.__init__(self)
+ self.index = index
+ if rawdata is not None:
+ self.fromBinArray(rawdata)
+
+ def fromBinArray(self, rawdata):
+ t = struct.unpack(self.format, rawdata)
+ self.sh_name = t[0]
+ self.sh_type = ElfShType(t[1])
+ self.sh_flags = t[2]
+ self.sh_addr = t[3]
+ self.sh_offset = t[4]
+ self.sh_size = t[5]
+ self.sh_link = t[6]
+ self.sh_info = t[7]
+ self.sh_addralign = t[8]
+ self.sh_entsize = t[9]
+
+ def resolve_names(self):
+ self.content.resolve_names(self.owner)
+
+ @nested_property
+ def content():
+ def fget(self):
+ return self._content
+ def fset(self, data):
+ """Use the Section factory to get the subclass corresponding to the
+ session type specified in this header)."""
+ self._content = Section(self, data)
+ return locals()
+
+# For sections that contain elements of specific types :
+
+class Elf64_Sym(object):
+ """Symbol Table entry"""
+ format = "<I 2B H 2Q "
+ entsize = struct.calcsize(format)
+ def __init__(self, rawdata=None):
+ object.__init__(self)
+ if rawdata is not None:
+ self.fromBinArray(rawdata)
+
+ @nested_property
+ def st_binding():
+ def fget(self):
+ return ElfSymbolBinding((self.st_info >> 4) & 0x0f)
+ def fset(self, value):
+ self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f))
+ return locals()
+
+ @nested_property
+ def st_type():
+ def fget(self):
+ return ElfSymbolType(self.st_info & 0x0f)
+ def fset(self, value):
+ self.st_info = ((self.st_info & 0xf0) | (value & 0x0f))
+ return locals()
+
+ @nested_property
+ def st_visibility():
+ def fget(self):
+ return ElfSymbolVisibility(self.st_other & 0x03)
+ def fset(self, value):
+ self.st_other = ((self.st_other & 0xfc) | (value & 0x03))
+ return locals()
+
+ def fromBinArray(self, rawdata):
+ t = struct.unpack(self.format, rawdata)
+ self.st_name = t[0] # index in the strtab pointed by sh_link
+ self.st_info = t[1]
+ self.st_other = t[2]
+ self.st_shndx = ElfSectionIndex(t[3])
+ self.st_value = t[4]
+ self.st_size = t[5]
+
+
+class Elf64_Rel(object):
+ format = "<2Q"
+ def __init__(self, rawdata=None):
+ object.__init__(self)
+ self.r_addend = 0 # No addend in a Rel.
+ if rawdata is not None:
+ self.fromBinArray(rawdata)
+
+ def fromBinArray(sef, rawdata):
+ t = struct.unpack(self.format, rawdata)
+ self.r_offset = t[0]
+ self.r_info = t[1]
+
+ @nested_property
+ def r_sym():
+ def fget(self):
+ return (self.r_info >> 32) & 0xffffffff
+ def fset(self, value):
+ self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff)
+ return locals()
+
+ @nested_property
+ def r_type():
+ def fget(self):
+ return Amd64Relocation(self.r_info & 0xffffffff)
+ def fset(self, value):
+ self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff)
+ return locals()
+
+
+class Elf64_Rela(Elf64_Rel):
+ format = "<2Q q"
+ def __init__(self, rawdata=None):
+ Elf64_Rel.__init__(self, rawdata)
+
+ def fromBinArray(self, rawdata):
+ t = struct.unpack(self.format, rawdata)
+ self.r_offset = t[0]
+ self.r_info = t[1]
+ self.r_addend = t[2]
+
+
+class Elf64_Dyn(object):
+ format = "<2Q"
+ size = struct.calcsize(format)
+ def __init__(self, tag, value):
+ object.__init__(self)
+ self.d_tag = tag
+ self.d_val = value
+
+ @nested_property
+ def d_ptr():
+ def fget(self):
+ return self.d_val
+ def fset(self, value):
+ self.d_val = value
+ return locals()
+
+ def toBinArray(self):
+ ba = BinArray()
+ ba.fromstring(struct.pack(self.format, self.d_tag, self.d_val))
+ return ba
+
+# Sections types :
+
+def Section(shdr, data=None):
+ """A section factory"""
+ dataclass = {
+ SHT_NULL: SNull,
+ SHT_PROGBITS: SProgBits,
+ SHT_SYMTAB: SSymtab,
+ SHT_STRTAB: SStrtab,
+ SHT_RELA: SRela,
+ SHT_HASH: SHash,
+ SHT_DYNAMIC: SDynamic,
+ SHT_NOTE: SNote,
+ SHT_NOBITS: SNobits,
+ SHT_REL: SRel,
+ SHT_SHLIB: SShlib,
+ SHT_DYNSYM: SDynsym
+ }
+ if shdr.sh_type in dataclass:
+ return dataclass[shdr.sh_type](shdr, data)
+ else:
+ return BaseSection(shdr, data)
+
+
+class BaseSection(object):
+ def __init__(self, shdr, rawdata=None):
+ object.__init__(self)
+ self.data = None
+ self.header = shdr
+ if rawdata is not None:
+ self.fromBinArray(rawdata)
+
+ def fromBinArray(self, rawdata):
+ self.data = rawdata
+
+ def toBinArray(self):
+ if self.data:
+ return self.data
+ else:
+ return BinArray()
+
+ def resolve_names(self, elf):
+ """Nothing to resolve."""
+ pass
+
+ @nested_property
+ def size():
+ def fget(self):
+ return len(self.data)
+ return locals()
+ physical_size = size
+ logical_size = size
+
+ def layout(self):
+ pass
+
+
+class SNull(BaseSection):
+ def __init__(self, shdr, data=None):
+ BaseSection.__init__(self, shdr, None)
+
+
+class SProgBits(BaseSection):
+ def __init__(self, shdr, data=None):
+ BaseSection.__init__(self, shdr, data)
+
+
+class SSymtab(BaseSection):
+ entsize = struct.calcsize(Elf64_Sym.format)
+ def __init__(self, shdr, data=None):
+ self.symtab = []
+ BaseSection.__init__(self, shdr, data)
+
+ def fromBinArray(self, data):
+ BaseSection.fromBinArray(self, data)
+ nument = len(data) / self.entsize
+ for i in range(nument):
+ start = i * self.entsize
+ end = i * self.entsize + self.entsize
+ self.symtab.append(Elf64_Sym(data[start:end]))
+
+ def resolve_names(self, elf):
+ # For a symtab, the strtab is indicated by sh_link
+ strtab = elf.shdrs[self.header.sh_link].content
+ # Resolve for all symbols in the table
+ for sym in self.symtab:
+ sym.name = strtab[sym.st_name]
+
+ def __getitem__(self, key):
+ return self.symtab[key]
+
+
+class SStrtab(BaseSection):
+ """This one behaves in two completely different ways.
+ If it's given a section header and data, it will act as read-only, only to
+ be used for name resolution.
+ If it's not given any argument, it can be used to create a new Strtab."""
+ def __init__(self, shdr=None, data=None):
+ self.readonly = (shdr is not None)
+ self.strtab = {}
+ self.table = []
+ BaseSection.__init__(self, shdr, data)
+ self.virt_addr = None
+
+ def toBinArray(self):
+ if self.readonly:
+ return BaseSection.toBinArray()
+
+ ba = BinArray()
+ keys = self.strtab.keys()
+ keys.sort()
+ for k in keys:
+ ba.fromstring(self.strtab[k] + "\0")
+ return ba
+
+ @nested_property
+ def size():
+ def fget(self):
+ if self.readonly:
+ return len(data)
+ if len(self.strtab) == 0:
+ return 0
+ return sum((len(x)+1 for x in self.strtab.values()))
+ return locals()
+ physical_size = size
+ logical_size = size
+
+ def iteritems(self):
+ return self.strtab.iteritems()
+
+ # Resolution functions
+
+ def fromBinArray(self, data):
+ BaseSection.fromBinArray(self, data)
+ itab = data.tostring().split('\0')
+ i = 0
+ for sname in itab:
+ self.strtab[i] = sname
+ i += len(sname) + 1
+
+ def __getitem__(self, key):
+ if key in self.strtab:
+ return self.strtab[key]
+ else:
+ v = self.data[key:].tostring().split('\0')[0]
+ self.strtab[key] = v
+ return v
+
+ # Executable creation functions
+
+ def append(self, string):
+ if len(self.strtab) == 0:
+ offset = 0
+ else:
+ last = max(self.strtab.keys())
+ offset = last + len(self.strtab[last]) + 1 # for the \0
+ self.strtab[offset] = string
+ return offset
+
+ def layout(self):
+ pass
+
+
+class SRela(BaseSection):
+ entsize = struct.calcsize(Elf64_Rela.format)
+ def __init__(self, shdr, data=None):
+ self.relatab = []
+ BaseSection.__init__(self, shdr, data)
+
+ def fromBinArray(self, data):
+ BaseSection.fromBinArray(self, data)
+ nument = len(data) / self.entsize
+ for i in range(nument):
+ start = i * self.entsize
+ end = i * self.entsize + self.entsize
+ self.relatab.append(Elf64_Rela(data[start:end]))
+
+ def resolve_names(self, elf):
+ """Badly named, this wil resolve to a symtab entry..."""
+ # sh_link leads to the symtab
+ self.symtab = elf.shdrs[self.header.sh_link].content
+ # sh_info links to the section on which the relocation applies
+ self.header.target = elf.shdrs[self.header.sh_info]
+ for r in self.relatab:
+ r.symbol = self.symtab[r.r_sym]
+
+
+
+class SHash(BaseSection):
+ pass
+
+
+class SDynamic(BaseSection):
+ pass
+
+
+class SNote(BaseSection):
+ pass
+
+
+class SNobits(BaseSection):
+ size = 0
+ physical_size = 0
+
+ @nested_property
+ def logical_size():
+ def fget(self):
+ return self.header.sh_size
+ return locals()
+
+ def toBinArray(self):
+ return BinArray()
+
+class SRel(BaseSection):
+ pass
+
+
+class SShlib(BaseSection):
+ pass
+
+
+class SDynsym(SSymtab):
+ pass
+
+
+class Elf64_Phdr(object):
+ format = "<2I 6Q"
+ size = struct.calcsize(format)
+ physical_size = size
+ logical_size = size
+
+ def __init__(self):
+ object.__init__(self)
+ self.p_type = PT_NULL
+ self.p_flags = PF_X + PF_W + PF_R
+ self.p_offset = 0
+ self.p_vaddr = 0
+ self.p_paddr = 0
+ self.p_filesz = 0
+ self.p_memsz = 0
+ self.p_align = 1
+
+ def toBinArray(self):
+ res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset,
+ self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align)
+ return BinArray(res)
+
+ def layout(self):
+ pass
+
+ def update_from_content(self, content):
+ """ Update ofset, address and sizes.
+ After having applied layout(),the content knows all these values."""
+ self.p_offset = content.file_offset
+ self.p_vaddr = content.virt_addr
+ self.p_filesz = content.physical_size
+ self.p_memsz = content.logical_size
+
+
+class BaseSegment(object):
+ def __init__(self, align=0):
+ object.__init__(self)
+ self.align = align
+ self.content = []
+
+ def add_content(self, content):
+ self.content.append(content)
+
+ def toBinArray(self):
+ ba = BinArray()
+ for c in self.content:
+ ba.extend(c.toBinArray())
+ return ba
+
+ @nested_property
+ def size():
+ def fget(self):
+ return sum(c.size for c in self.content)
+ return locals()
+ physical_size = size
+ logical_size = size
+
+
+class TextSegment(BaseSegment):
+ def __init__(self, align=0):
+ BaseSegment.__init__(self, align)
+
+ def layout(self):
+ virt_addr = self.virt_addr
+ file_offset = self.file_offset
+ for i in self.content:
+ i.virt_addr = virt_addr
+ i.file_offset = file_offset
+ i.layout()
+ virt_addr += i.logical_size
+ file_offset += i.physical_size
+
+
+class DataSegment(BaseSegment):
+ def __init__(self, align=0):
+ BaseSegment.__init__(self, align)
+ self.nobits = []
+
+ def add_nobits(self, content):
+ self.nobits.append(content)
+
+ def layout(self):
+ virt_addr = self.virt_addr
+ file_offset = self.file_offset
+ for i in self.content:
+ i.virt_addr = virt_addr
+ i.file_offset = file_offset
+ i.layout()
+ virt_addr += i.logical_size
+ file_offset += i.physical_size
+ for i in self.nobits:
+ i.virt_addr = virt_addr
+ i.file_offset = 0
+ i.layout()
+ virt_addr += i.logical_size
+
+ @nested_property
+ def logical_size():
+ def fget(self):
+ return self.physical_size + sum(c.logical_size for c in self.nobits)
+ return locals()
+
+
+class Dynamic(object):
+ def __init__(self):
+ object.__init__(self)
+ self.dyntab = []
+ self.strtab = SStrtab()
+
+ @nested_property
+ def size():
+ def fget(self):
+ # End the table with a DT_NULL without associated value.
+ return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q"))
+ return locals()
+ physical_size = size
+ logical_size = size
+
+ def add_shlib(self, shlib):
+ offset = self.strtab.append(shlib)
+ self.dyntab.append(Elf64_Dyn(DT_NEEDED, offset))
+
+ def add_symtab(self, vaddr):
+ self.dyntab.append(Elf64_Dyn(DT_SYMTAB, vaddr))
+
+ def add_debug(self):
+ self.dyntab.append(Elf64_Dyn(DT_DEBUG, 0))
+
+ def layout(self):
+ # Adjust the address of the strtab, if
+ if self.strtab.virt_addr is None:
+ print "Ooops, strtab's address is not known yet. Aborting."
+ exit(1)
+ else:
+ self.dyntab.append(Elf64_Dyn(DT_STRTAB, self.strtab.virt_addr))
+
+ @nested_property
+ def dt_debug_address():
+ def fget(self):
+ for i, d in enumerate(self.dyntab):
+ if d.d_tag == DT_DEBUG:
+ return self.virt_addr + (i*d.size + (d.size/2))
+ return locals()
+
+
+ def toBinArray(self):
+ ba = BinArray()
+ for d in self.dyntab:
+ ba.extend(d.toBinArray())
+ null = struct.pack("<Q", DT_NULL)
+ ba.fromstring(null)
+ return ba
+
+
+class Interpreter(object):
+ default_interpreter = "/lib64/ld-linux-x86-64.so.2"
+
+ def __init__(self, interpreter=None):
+ object.__init__(self)
+ if interpreter:
+ self.interpreter = interpreter
+ else:
+ self.interpreter = self.default_interpreter
+
+ @nested_property
+ def size():
+ def fget(self):
+ # Null terminated
+ return len(self.interpreter) + 1
+ return locals()
+ physical_size = size
+ logical_size = size
+
+ def toBinArray(self):
+ ba = BinArray(self.interpreter)
+ ba.append(0)
+ return ba
+
+ def layout(self):
+ pass
+
--- /dev/null
+# -*- coding: utf-8 -*-
+
+# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
+
+# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
+#
+# This file is part of bold, the Byte Optimized Linker.
+#
+# You can redistribute this file and/or modify it under the terms of the
+# GNU Lesser General Public License as published by the Free Software
+# Foundation, version 2.1.
+
+"""Define all the exceptions."""
+
+class NotRelocatableObject(Exception):
+ """Raised when an input file is not a relocatable ELF object."""
+ def __init__(self, path):
+ self.path = path
+ def __str__(self):
+ return "File '%s' is not a relocatable object file" % self.path
+
+class UnsupportedObject(Exception):
+ """Raised when an input file is not of a supported arch."""
+ def __init__(self, path, reason):
+ self.path = path
+ self.reason = reason
+ def __str__(self):
+ return "File '%s' is not supported: %s" % (self.path, self.reason)
+
+class LibNotFound(Exception):
+ """Raised if a shared library could not be found."""
+ def __init__(self, libname):
+ self.libname = libname
+ def __str__(self):
+ return "Cannot find shared library for '%s'" % self.libname
+
+class UndefinedSymbol(Exception):
+ """Raised if a symbol is referenced but not declared."""
+ def __init__(self, symbol_name):
+ self.symbol = symbol_name
+ def __str__(self):
+ return "Undefined reference to '%s'" % self.symbol
+
+class RedefinedSymbol(Exception):
+ """Raised if a symbol is referenced but not declared."""
+ def __init__(self, symbol_name):
+ self.symbol = symbol_name
+ def __str__(self):
+ return "Symbol '%s' is declared twice" % self.symbol
--- /dev/null
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
+
+# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
+#
+# This file is part of bold, the Byte Optimized Linker.
+#
+# You can redistribute this file and/or modify it under the terms of the
+# GNU Lesser General Public License as published by the Free Software
+# Foundation, version 2.1.
+
+"""
+Main entry point for the bold linker.
+"""
+
+from constants import *
+from elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter
+from errors import *
+from ctypes.util import find_library
+
+class BoldLinker(object):
+ """A Linker object takes one or more objects files, optional shared libs,
+ and arranges all this in an executable.
+
+ Important note: the external functions from the libraries are NOT resolved.
+ This import is left to the user, as it can be done more efficiently by hash.
+ (http://www.linuxdemos.org/contentarticle/how_to_start_4k_introdev_with_ibh)
+ For this, a very useful symbol is exported, : _dt_debug, the address of the
+ DT_DEBUG's d_ptr.
+ """
+
+ def __init__(self):
+ object.__init__(self)
+
+ self.objs = []
+ self.shlibs = []
+ self.entry_point = "_start"
+ self.output = Elf64()
+
+ def add_object(self, filename):
+ """Add a relocatable file as input."""
+ obj = Elf64(filename)
+ obj.resolve_names()
+ obj.find_symbols()
+ self.objs.append(obj)
+
+ def add_shlib(self, libname):
+ """Add a shared library to link against."""
+ # Note : we use ctypes' find_library to find the real name
+ fullname = find_library(libname)
+ if not fullname:
+ raise LibNotFound(libname)
+ self.shlibs.append(fullname)
+
+ def link(self):
+ """Do the actual linking."""
+ # Prepare two segments. One for .text, the other for .data + .bss
+ self.text_segment = TextSegment()
+ # .data will be mapped 0x100000 bytes further
+ self.data_segment = DataSegment(align=0x100000)
+ self.output.add_segment(self.text_segment)
+ self.output.add_segment(self.data_segment)
+
+ # Adjust the ELF header
+ self.output.header.e_ident.make_default_amd64()
+ self.output.header.e_phoff = self.output.header.size
+ self.output.header.e_type = ET_EXEC
+ # Elf header lies inside .text
+ self.text_segment.add_content(self.output.header)
+
+ # Create the four Program Headers. They'll be inside .text
+ # The first Program Header defines .text
+ ph_text = Elf64_Phdr()
+ ph_text.p_type = PT_LOAD
+ ph_text.p_align = 0x100000
+ self.output.add_phdr(ph_text)
+ self.text_segment.add_content(ph_text)
+
+ # Second one defines .data + .bss
+ ph_data = Elf64_Phdr()
+ ph_data.p_type = PT_LOAD
+ ph_data.p_align = 0x100000
+ self.output.add_phdr(ph_data)
+ self.text_segment.add_content(ph_data)
+
+ # Third one is only there to define the DYNAMIC section
+ ph_dynamic = Elf64_Phdr()
+ ph_dynamic.p_type = PT_DYNAMIC
+ self.output.add_phdr(ph_dynamic)
+ self.text_segment.add_content(ph_dynamic)
+
+ # Fourth one is for interp
+ ph_interp = Elf64_Phdr()
+ ph_interp.p_type = PT_INTERP
+ self.output.add_phdr(ph_interp)
+ self.text_segment.add_content(ph_interp)
+
+ # We have all the needed program headers, update ELF header
+ self.output.header.ph_num = len(self.output.phdrs)
+
+ # Create the actual content for the interpreter section
+ interp = Interpreter()
+ self.text_segment.add_content(interp)
+
+ # Then the Dynamic section
+ dynamic = Dynamic()
+ # for all the requested libs, add a reference in the Dynamic table
+ for lib in self.shlibs:
+ dynamic.add_shlib(lib)
+ # Add an empty symtab, symbol resolution is not done.
+ dynamic.add_symtab(0)
+ # And we need a DT_DEBUG
+ dynamic.add_debug()
+
+ # This belongs to .data
+ self.data_segment.add_content(dynamic)
+ # The dynamic table links to a string table for the libs' names.
+ self.text_segment.add_content(dynamic.strtab)
+
+ # We can now add the interesting sections to the corresponding segments
+ for i in self.objs:
+ for sh in i.shdrs:
+ # Only ALLOC sections are worth it.
+ # This might require change in the future
+ if not (sh.sh_flags & SHF_ALLOC):
+ continue
+
+ if (sh.sh_flags & SHF_EXECINSTR):
+ self.text_segment.add_content(sh.content)
+ else: # No exec, it's for .data or .bss
+ if (sh.sh_type == SHT_NOBITS):
+ self.data_segment.add_nobits(sh.content)
+ else:
+ self.data_segment.add_content(sh.content)
+
+ # Now, everything is at its place.
+ # Knowing the base address, we can determine where everyone will fall
+ self.output.layout(base_vaddr=0x400000)
+
+ # Knowing the addresses of all the parts, Program Headers can be filled
+ # This will put the correct p_offset, p_vaddr, p_filesz and p_memsz
+ ph_text.update_from_content(self.text_segment)
+ ph_data.update_from_content(self.data_segment)
+ ph_interp.update_from_content(interp)
+ ph_dynamic.update_from_content(dynamic)
+
+
+ # Gather the undefined symbols from all input files
+ undefined_symbols = set()
+ for i in self.objs:
+ undefined_symbols.update(i.undefined_symbols)
+
+ # Make a dict with all the symbols declared globally.
+ # Key is the symbol name, value is the final virtual address
+ global_symbols = {}
+
+ for i in self.objs:
+ for s in i.global_symbols:
+ if s in global_symbols:
+ raise RedefinedSymbol(s)
+ # Final address is the section's base address + the symbol's offset
+ addr = i.global_symbols[s][0].content.virt_addr
+ addr += i.global_symbols[s][1]
+ global_symbols[s] = addr
+
+ # Add a few useful symbols
+ global_symbols["_dt_debug"] = dynamic.dt_debug_address
+ global_symbols["_DYNAMIC"] = dynamic.virt_addr
+
+ # Find out which symbols aren't really defined anywhere
+ undefined_symbols.difference_update(global_symbols)
+
+ # For now, it's an error. Later, we could try to find them in the shared
+ # libraries.
+ if len(undefined_symbols):
+ raise UndefinedSymbol(undefined_symbols.pop())
+
+
+
+ # We can now do the actual relocation
+ for i in self.objs:
+ i.apply_relocation(global_symbols)
+
+ # And update the ELF header with the entry point
+ if not self.entry_point in global_symbols:
+ raise UndefinedSymbol(self.entry_point)
+ self.output.header.e_entry = global_symbols[self.entry_point]
+
+ # DONE !
+
+
+ def toBinArray(self):
+ return self.output.toBinArray()
+
+ def tofile(self, file_object):
+ return self.output.toBinArray().tofile(file_object)
+
# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
#
# This file is part of bold, the Byte Optimized Linker.
-# Heavily inspired by elf.h from the GNU C Library.
#
# You can redistribute this file and/or modify it under the terms of the
# GNU Lesser General Public License as published by the Free Software
# Foundation, version 2.1.
-from elf.BinArray import BinArray
-from elf.constants import *
-from elf.elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter
-import struct, sys
-
-infiles = [Elf64(n) for n in sys.argv[1:]]
-for i in infiles:
- i.resolve_names()
- i.find_symbols()
-
-#h = infile.header
-#print "Class: %s" % h.e_ident.ei_class
-#print "Data: %s" % h.e_ident.ei_data
-#print "Version: %s" % h.e_ident.ei_version
-#print "OS/ABI: %s" % h.e_ident.ei_osabi
-#print "ABI Version: %s" % h.e_ident.ei_abiversion
-#print "Type: %s" % h.e_type
-#print "Machine: %s" % h.e_machine
-#print "Version: %s" % h.e_version
-#print "Entry point address: 0x%x" % h.e_entry
-#print "Start of program headers: %i (bytes into file)" % h.e_phoff
-#print "Start of section headers: %i (bytes into file)" % h.e_shoff
-#print "Flags: 0x%x" % h.e_flags
-#print "Size of this header: %i (bytes)" % h.e_ehsize
-#print "Size of program headers: %i (bytes)" % h.e_phentsize
-#print "Number of program headers: %i" % h.e_phnum
-#print "Size of section headers: %i (bytes)" % h.e_shentsize
-#print "Number of section headers: %i" % h.e_shnum
-
-#print "Section header string table index: %s" % h.e_shstrndx
-
-#print
-
-#print "Section Headers:"
-#for sh in infile.shdrs:
- #print "[%2i] %-16s %-16s %016x %08x" % (sh.index, sh.name, sh.sh_type,
- #sh.sh_addr, sh.sh_offset)
- #print " %016x %016x %-5s %4i %4i %4i" % (sh.sh_size, sh.sh_entsize,
- #sh.sh_flags, sh.sh_link, sh.sh_info, sh.sh_addralign)
-#print
-
-#for sh in infile.shdrs :
- #if sh.sh_type == SHT_STRTAB:
- ##print "Section %i is a string table with entries :" % sh.index
- ##for i, name in sh.content.iteritems():
- ## print "%4i %s" % (i, name)
- #print
- #elif sh.sh_type == SHT_SYMTAB:
- #print "Section %i is a symbol table with entries :" % sh.index
- #print " Num: Value Size Type Bind Vis Ndx Name"
- #for i, sym in enumerate(sh.content.symtab):
- #print "%6i: %016x %5s %-7s %-6s %-7s %4s %s" % (i,
- #sym.st_value, sym.st_size, sym.st_type, sym.st_binding,
- #sym.st_visibility, sym.st_shndx, sym.name)
- #print
- #elif sh.sh_type == SHT_RELA:
- #print "Section %s is a RELA that applies to %s:" % (sh.name, sh.target.name)
- #print " Offset Info Type Sym. Value Sym. Name + Addend"
- #for i in sh.content.relatab:
- #print "%012x %012x %-16s %016x %s%s + %x" % (i.r_offset, i.r_info,
- #i.r_type, i.symbol.st_value, i.symbol.name,
- #sh.owner.shdrs[i.symbol.st_shndx].name,
- #i.r_addend)
- #print
-
-
-
-outfile = Elf64()
-
-text_segment = TextSegment()
-data_segment = DataSegment(align=0x100000)
-
-outfile.add_segment(text_segment)
-outfile.add_segment(data_segment)
-
-
-outfile.header.e_ident.make_default_amd64()
-outfile.header.e_phoff = outfile.header.size
-outfile.header.e_type = ET_EXEC
-text_segment.add_content(outfile.header)
-
-ph_text = Elf64_Phdr()
-ph_text.p_type = PT_LOAD
-ph_text.p_align = 0x100000
-outfile.add_phdr(ph_text)
-text_segment.add_content(ph_text)
-
-ph_data = Elf64_Phdr()
-ph_data.p_type = PT_LOAD
-ph_data.p_align = 0x100000
-outfile.add_phdr(ph_data)
-text_segment.add_content(ph_data)
-
-ph_dynamic = Elf64_Phdr()
-ph_dynamic.p_type = PT_DYNAMIC
-outfile.add_phdr(ph_dynamic)
-text_segment.add_content(ph_dynamic)
-
-ph_interp = Elf64_Phdr()
-ph_interp.p_type = PT_INTERP
-outfile.add_phdr(ph_interp)
-text_segment.add_content(ph_interp)
-
-interp = Interpreter()
-text_segment.add_content(interp)
-
-dynamic = Dynamic()
-dynamic.add_shlib("libGL.so.1")
-dynamic.add_shlib("libSDL-1.2.so.0")
-dynamic.add_symtab(0)
-dynamic.add_debug()
-data_segment.add_content(dynamic)
-text_segment.add_content(dynamic.strtab)
-
-
-# Find interresting sections in input file
-for i in infiles:
- for sh in i.shdrs:
- if (sh.sh_flags & SHF_ALLOC):
- if (sh.sh_flags & SHF_EXECINSTR):
- text_segment.add_content(sh.content)
- else: # No exec, it's for .data
- if (sh.sh_type == SHT_NOBITS):
- data_segment.add_nobits(sh.content)
- else:
- data_segment.add_content(sh.content)
-
-
-outfile.layout(base_vaddr=0x400000)
-
-
-# Set addresses, sizes, etc. where known
-outfile.header.e_phnum = len(outfile.phdrs)
-outfile.header.e_phoff = outfile.phdrs[0].file_offset
-
-ph_text.p_offset = text_segment.file_offset
-ph_text.p_vaddr = text_segment.virt_addr
-ph_text.p_filesz = text_segment.physical_size
-ph_text.p_memsz = text_segment.logical_size
-
-ph_data.p_offset = data_segment.file_offset
-ph_data.p_vaddr = data_segment.virt_addr
-ph_data.p_filesz = data_segment.physical_size
-ph_data.p_memsz = data_segment.logical_size
-
-ph_interp.p_offset = interp.file_offset
-ph_interp.p_vaddr = interp.virt_addr
-ph_interp.p_filesz = interp.physical_size
-ph_interp.p_memsz = interp.logical_size
-
-ph_dynamic.p_offset = dynamic.file_offset
-ph_dynamic.p_vaddr = dynamic.virt_addr
-ph_dynamic.p_filesz = dynamic.physical_size
-ph_dynamic.p_memsz = dynamic.logical_size
-
-for i in infiles:
- outfile.undefined_symbols.extend(i.undefined_symbols)
-
-dt_dbg = dynamic.dt_debug_address
-outfile.global_symbols["_dt_debug"] = dt_dbg
-outfile.global_symbols["_DYNAMIC"] = dynamic.virt_addr
-
-# Take all globally declared symbols, and put them in outfile's dict
-for i in infiles:
- for s in i.global_symbols:
- section_addr = i.global_symbols[s][0].content.virt_addr
- addr = section_addr + i.global_symbols[s][1]
- if s in outfile.global_symbols:
- print "Symbol '%s' defined more than once."
- exit(1)
- outfile.global_symbols[s] = addr
-
-for i in infiles:
- i.apply_relocation(outfile.global_symbols)
-
-_start = outfile.global_symbols["_start"]
-outfile.header.e_entry = _start
-
-# outfile.apply_global_relocation()
-
-f = open("prout", "wb")
-outfile.toBinArray().tofile(f)
-f.close()
-
-
+#from bold.constants import *
+#from bold.elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter
+
+__author__ = "Amand Tihon <amand.tihon@alrj.org>"
+__version__ = "0.0.1"
+
+
+from Bold.linker import BoldLinker
+from Bold.errors import *
+from optparse import OptionParser
+import os, sys
+
+class BoldOptionParser(OptionParser):
+ """Bold option parser."""
+ global __version__
+ _usage_message = "%prog [options] file..."
+ _version_message = "%%prog version %s" % __version__
+ _description_message = """A limited ELF linker for x86_64. It is
+intended to create very small executables with the least possible overhead."""
+
+ def __init__(self):
+ OptionParser.__init__(self, usage=self._usage_message,
+ version=self._version_message, description=self._description_message,
+ add_help_option=True, prog="bold")
+
+ self.set_defaults(entry="_start", outfile="a.out")
+
+ self.add_option("-e", "--entry", action="store", dest="entry",
+ metavar="SYMBOL", help="Set the entry point (default: _start)")
+ self.add_option("-l", "--library", action="append", dest="shlibs",
+ metavar="LIBNAME", help="Search for library LIBNAME")
+ self.add_option("-o", "--output", action="store", dest="outfile",
+ metavar="FILE", help="Set output file name (default: a.out)")
+
+
+def main():
+ parser = BoldOptionParser()
+ options, args = parser.parse_args()
+
+ linker = BoldLinker()
+
+ if options.shlibs:
+ for shlib in options.shlibs:
+ try:
+ linker.add_shlib(shlib)
+ except LibNotFound, e:
+ print >>sys.stderr, e
+ return 1
+
+ if not args:
+ print >>sys.stderr, "No input files"
+ return 1
+
+ for infile in args:
+ try:
+ linker.add_object(infile)
+ except UnsupportedObject, e:
+ print >>sys.stderr, e
+ return 1
+ except IOError, e:
+ print >>sys.stderr, e
+ return 1
+
+ linker.entry_point = options.entry
+
+ try:
+ linker.link()
+ except UndefinedSymbol, e:
+ print >>sys.stderr, e
+ return 1
+ except RedefinedSymbol, e:
+ print >>sys.stderr, e
+ return 1
+
+ # Remove the file if it was present
+ try:
+ os.unlink(options.outfile)
+ except os.error, e:
+ if e.errno == 2: # No such file
+ pass
+
+ try:
+ o = open(options.outfile, "wb")
+ except IOError, e:
+ print >>sys.stderr, e
+ return 1
+
+ linker.tofile(o)
+ o.close()
+
+ try:
+ os.chmod(options.outfile, 0755)
+ except IOError, e:
+ print >>sys.stderr, e
+ return 1
+
+ return 0
+
+
+if __name__ == "__main__":
+ try:
+ rcode = main()
+ except Exception, e:
+ raise
+ print >>sys.stderr, "Unhandled error:", e
+ rcode = 1
+
+ exit(rcode)
+++ /dev/null
-# -*- coding: utf-8 -*-
-
-# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
-
-# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
-#
-# This file is part of bold, the Byte Optimized Linker.
-# Heavily inspired by elf.h from the GNU C Library.
-#
-# You can redistribute this file and/or modify it under the terms of the
-# GNU Lesser General Public License as published by the Free Software
-# Foundation, version 2.1.
-
-from array import array
-import struct
-
-class BinArray(array):
- """A specialized array that contains bytes"""
- def __new__(cls, data=None):
- if data:
- return array.__new__(BinArray, "B", data)
- else:
- return array.__new__(BinArray, "B")
+++ /dev/null
-# -*- coding: utf-8 -*-
-
-# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
-
-# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
-#
-# This file is part of bold, the Byte Optimized Linker.
-# Heavily inspired by elf.h from the GNU C Library.
-#
-# You can redistribute this file and/or modify it under the terms of the
-# GNU Lesser General Public License as published by the Free Software
-# Foundation, version 2.1.
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-
-# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
-
-# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
-#
-# This file is part of bold, the Byte Optimized Linker.
-# Heavily inspired by elf.h from the GNU C Library.
-#
-# You can redistribute this file and/or modify it under the terms of the
-# GNU Lesser General Public License as published by the Free Software
-# Foundation, version 2.1.
-
-"""This file defines standard ELF constants."""
-
-class SymbolicConstant(long):
- """Allows you to map a symbolic name with a given integer."""
- _symbolics = {}
- _default = None
- def __new__(cls, value, symbolic=None):
- if symbolic:
- cls._symbolics[value] = symbolic
- return long.__new__(cls, value)
-
- def __str__(self):
- if long(self) in self._symbolics:
- return self._symbolics[long(self)]
- elif self._default:
- return self._default % long(self)
- else:
- return str(long(self))
-
-
-class ElfClass(SymbolicConstant):
- _symbolics = {}
-ELFCLASSNONE = ElfClass(0, "Invalid ELF class")
-ELFCLASS32 = ElfClass(1, "ELF32")
-ELFCLASS64 = ElfClass(2, "ELF64")
-
-
-class ElfData(SymbolicConstant):
- _symbolics = {}
-ELFDATANONE = ElfData(0, "Invalid data encoding")
-ELFDATA2LSB = ElfData(1, "Little endian")
-ELFDATA2MSB = ElfData(2, "Big endian")
-
-
-class ElfVersion(SymbolicConstant):
- _symbolics = {}
-EV_NONE = ElfVersion(0, "Invalid ELF version")
-EV_CURRENT = ElfVersion(1, "Current version (1)")
-
-
-class ElfOsAbi(SymbolicConstant):
- _symbolics = {}
-# Fill me
-ELFOSABI_NONE = ElfOsAbi(0, "UNIX - System V")
-ELFOSABI_SYSV = ElfOsAbi(0, "UNIX - System V")
-
-
-class ElfType(SymbolicConstant):
- _symbolics = {}
-ET_NONE = ElfType(0, "No file type")
-ET_REL = ElfType(1, "Relocatable file")
-ET_EXEC = ElfType(2, "Executable file")
-ET_DYN = ElfType(3, "Shared object file")
-ET_CORE = ElfType(4, "Core file")
-
-
-class ElfMachine(SymbolicConstant):
- _symbolics = {}
-# Fill me
-EM_NONE = ElfMachine(0, "No machine")
-EM_386 = ElfMachine(3, "Intel 80386")
-EM_X86_64 = ElfMachine(62, "AMD x86-64 architecture")
-
-class ElfSectionIndex(SymbolicConstant):
- _symbolics = {}
-SHN_UNDEF = ElfSectionIndex(0, "UND")
-SHN_ABS = ElfSectionIndex(0xfff1, "ABS")
-SHN_COMMON = ElfSectionIndex(0xfff2, "COM")
-
-class ElfShType(SymbolicConstant):
- _symbolics = {}
-SHT_NULL = ElfShType(0, "NULL")
-SHT_PROGBITS = ElfShType(1, "PROGBITS")
-SHT_SYMTAB = ElfShType(2, "SYMTAB")
-SHT_STRTAB = ElfShType(3, "STRTAB")
-SHT_RELA = ElfShType(4, "RELA")
-SHT_HASH = ElfShType(5, "HASH")
-SHT_DYNAMIC = ElfShType(6, "DYNAMIC")
-SHT_NOTE = ElfShType(7, "NOTE")
-SHT_NOBITS = ElfShType(8, "NOBITS")
-SHT_REL = ElfShType(9, "REL")
-SHT_SHLIB = ElfShType(10, "SHLIB")
-SHT_DYNSYM = ElfShType(11, "DYNSYM")
-
-SHF_WRITE = 0x1
-SHF_ALLOC = 1 << 1
-SHF_EXECINSTR = 1 << 2
-SHF_MERGE = 1 << 4
-SHF_STRINGS = 1 << 5
-SHF_INFO_LINK = 1 << 6
-SHF_LINK_ORDER = 1 << 7
-SHF_OS_NONCONFORMING = 1 << 8
-SHF_GROUP = 1 << 9
-SHF_TLS = 1 << 10
-SHF_MASKOS = 0x0f00000
-SHF_MASKPROC = 0xf000000
-
-STN_UNDEF = 0
-
-
-class ElfSymbolBinding(SymbolicConstant):
- _symbolics = {}
-STB_LOCAL = ElfSymbolBinding(0, "LOCAL")
-STB_GLOBAL = ElfSymbolBinding(1, "GLOBAL")
-STB_WEAK = ElfSymbolBinding(2, "WEAK")
-
-
-class ElfSymbolType(SymbolicConstant):
- _symbolics = {}
-STT_NOTYPE = ElfSymbolType(0, "NOTYPE")
-STT_OBJECT = ElfSymbolType(1, "OBJECT")
-STT_FUNC = ElfSymbolType(2, "FUNC")
-STT_SECTION = ElfSymbolType(3, "SECTION")
-STT_FILE = ElfSymbolType(4, "FILE")
-STT_COMMON = ElfSymbolType(5, "COMMON")
-STT_TLS = ElfSymbolType(6, "TLS")
-
-
-class ElfSymbolVisibility(SymbolicConstant):
- _symbolics = {}
-STV_DEFAULT = ElfSymbolVisibility(0, "DEFAULT")
-STV_INTERNAL = ElfSymbolVisibility(1, "INTERN")
-STV_HIDDEN = ElfSymbolVisibility(2, "HIDDEN")
-STV_PROTECTED = ElfSymbolVisibility(3, "PROTECTED")
-
-
-class ElfPhType(SymbolicConstant):
- _symbolics = {}
-PT_NULL = ElfPhType(0, "NULL")
-PT_LOAD = ElfPhType(1, "LOAD")
-PT_DYNAMIC = ElfPhType(2, "DYNAMIC")
-PT_INTERP = ElfPhType(3, "INTERP")
-PT_NOTE = ElfPhType(4, "NOTE")
-PT_SHLIB = ElfPhType(5, "SHLIB")
-PT_PHDR = ElfPhType(6, "PHDR")
-PT_TLS = ElfPhType(7, "TLS")
-
-PF_X = (1 << 0)
-PF_W = (1 << 1)
-PF_R = (1 << 2)
-
-class ElfDynamicType(SymbolicConstant):
- _symbolics = {}
- _default = "Unknown (0x%x)"
-DT_NULL = ElfDynamicType(0, "NULL")
-DT_NEEDED = ElfDynamicType(1, "NEEDED")
-DT_PLTRELSZ = ElfDynamicType(2, "PLTRELSZ")
-DT_PLTGOT = ElfDynamicType(3, "PLTGOT")
-DT_HASH = ElfDynamicType(4, "HASH")
-DT_STRTAB = ElfDynamicType(5, "STRTAB")
-DT_SYMTAB = ElfDynamicType(6, "SYMTAB")
-DT_RELA = ElfDynamicType(7, "RELA")
-DT_RELASZ = ElfDynamicType(8, "RELASZ")
-DT_RELAENT = ElfDynamicType(9, "RELAENT")
-DT_STRSZ = ElfDynamicType(10, "STRSZ")
-DT_SYMENT = ElfDynamicType(11, "SYMENT")
-DT_INIT = ElfDynamicType(12, "INIT")
-DT_FINI = ElfDynamicType(13, "FINI")
-DT_SONAME = ElfDynamicType(14, "SONAME")
-DT_RPATH = ElfDynamicType(15, "RPATH")
-DT_SYMBOLIC = ElfDynamicType(16, "SYMBOLIC")
-DT_REL = ElfDynamicType(17, "REL")
-DT_RELSZ = ElfDynamicType(18, "RELSZ")
-DT_RELENT = ElfDynamicType(19, "RELENT")
-DT_PLTREL = ElfDynamicType(20, "PLTREL")
-DT_DEBUG = ElfDynamicType(21, "DEBUG")
-DT_TEXTREL = ElfDynamicType(22, "TEXTREL")
-DT_JMPREL = ElfDynamicType(23, "JMPREL")
-DT_BIND_NOW = ElfDynamicType(24, "BIND_NOW")
-DT_INIT_ARRAY = ElfDynamicType(25, "INIT_ARRAY")
-DT_FINI_ARRAY = ElfDynamicType(26, "FINI_ARRAY")
-DT_INIT_ARRAYSZ = ElfDynamicType(27, "INIT_ARRAYSZ")
-DT_FINI_ARRAYSZ = ElfDynamicType(28, "FINI_ARRAYSZ")
-DT_RUNPATH = ElfDynamicType(29, "RUNPATH")
-DT_FLAGS = ElfDynamicType(30, "FLAGS")
-DT_ENCODING = ElfDynamicType(31, "ENCODING")
-DT_PREINIT_ARRAY = ElfDynamicType(32, "PREINIT_ARRAY")
-DT_PREINIT_ARRAYSZ = ElfDynamicType(33, "PREINIT_ARRAYSZ")
-
-# AMD x86-64 relocations
-class Amd64Relocation(SymbolicConstant):
- _symbolics = {}
-
-R_X86_64_NONE = Amd64Relocation(0, "NONE")
-R_X86_64_64 = Amd64Relocation(1, "64")
-R_X86_64_PC32 = Amd64Relocation(2, "PC32")
-R_X86_64_GOT32 = Amd64Relocation(3, "GOT32")
-R_X86_64_PLT32 = Amd64Relocation(4, "PLT32")
-R_X86_64_COPY = Amd64Relocation(5, "COPY")
-R_X86_64_GLOB_DAT = Amd64Relocation(6, "GLOB_DAT")
-R_X86_64_JUMP_SLOT = Amd64Relocation(7, "JUMP_SLOT")
-R_X86_64_RELATIVE = Amd64Relocation(8, "RELATIVE")
-R_X86_64_GOTPCREL = Amd64Relocation(9, "GOTPCREL")
-R_X86_64_32 = Amd64Relocation(10, "32")
-R_X86_64_32S = Amd64Relocation(11, "32S")
-R_X86_64_16 = Amd64Relocation(12, "16")
-R_X86_64_PC16 = Amd64Relocation(13, "PC16")
-R_X86_64_8 = Amd64Relocation(14, "8")
-R_X86_64_PC8 = Amd64Relocation(15, "PC8")
-R_X86_64_DTPMOD64 = Amd64Relocation(16, "DTPMOD64")
-R_X86_64_DTPOFF64 = Amd64Relocation(17, "DTPOFF64")
-R_X86_64_TPOFF64 = Amd64Relocation(18, "TPOFF64")
-R_X86_64_TLSGD = Amd64Relocation(19, "TLSGD")
-R_X86_64_TLSLD = Amd64Relocation(20, "TLSLD")
-R_X86_64_DTPOFF32 = Amd64Relocation(21, "DTPOFF32")
-R_X86_64_GOTTPOFF = Amd64Relocation(22, "GOTTPOFF")
-R_X86_64_TPOFF32 = Amd64Relocation(23, "TPOFF32")
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+++ /dev/null
-# -*- coding: utf-8 -*-
-
-# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
-
-# Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
-#
-# This file is part of bold, the Byte Optimized Linker.
-# Heavily inspired by elf.h from the GNU C Library.
-#
-# You can redistribute this file and/or modify it under the terms of the
-# GNU Lesser General Public License as published by the Free Software
-# Foundation, version 2.1.
-
-
-from BinArray import BinArray
-from constants import *
-import struct
-
-# Helpful decorator
-def nested_property(c):
- return property(**c())
-
-#--------------------------------------------------------------------------
-# Elf
-#--------------------------------------------------------------------------
-
-class Elf64(object):
- """Handles an Elf64 object."""
- interpreter = "/lib64/ld-linux-x86-64.so.2"
-
- def __init__(self, path=None):
- object.__init__(self)
- self.header = Elf64_Ehdr()
- self.header.owner = self
- self.shdrs = []
- self.phdrs = []
- self.shlibs = []
- self.sections = {}
- self.segments = []
- self.local_symbols = {}
- self.global_symbols = {}
- self.undefined_symbols = []
-
- if path:
- self.fromfile(path)
-
- def fromfile(self, path):
- f = file(path, "rb")
-
- # Load Elf header
- data = BinArray()
- data.fromfile(f, Elf64_Ehdr.size)
- self.header.fromBinArray(data)
-
- # Load sections headers
- f.seek(self.header.e_shoff)
- for i in range(self.header.e_shnum):
- data = BinArray()
- data.fromfile(f, self.header.e_shentsize)
- h = Elf64_Shdr(i, data)
- h.owner = self
- self.shdrs.append(h)
-
- # Read sections content
- for sh in self.shdrs:
- data = BinArray()
- if sh.sh_type != SHT_NOBITS:
- f.seek(sh.sh_offset)
- data.fromfile(f, sh.sh_size)
- sh.content = data
-
- f.close()
-
- def resolve_names(self):
- # The .shstrtab index is in Elf Header. find the sections names
- strtab = self.shdrs[self.header.e_shstrndx].content
-
- for sh in self.shdrs:
- sh.name = strtab[int(sh.sh_name)]
- self.sections[sh.name] = sh
-
- # And resolve names in the section itself
- sh.resolve_names()
-
-
- def find_symbols(self):
- for sh in self.shdrs:
- if sh.sh_type == SHT_SYMTAB:
- symtab = sh.content.symtab
-
- for symbol in symtab:
- if symbol.st_type == STT_FILE:
- continue
- if symbol.st_shndx == SHN_ABS:
- continue
- if symbol.st_shndx == SHN_UNDEF:
- if symbol.name:
- self.undefined_symbols.append(symbol.name)
- continue
-
- target_section = self.shdrs[symbol.st_shndx]
-
- symbol_name = symbol.name
- value = symbol.st_value
- bind = symbol.st_binding
-
- # We got a name, a target section, and an offset in the section
- if symbol.st_binding == STB_LOCAL:
- if symbol.st_type == STT_SECTION:
- symbol_name = target_section.name
- self.local_symbols[symbol_name] = (target_section, value)
- else:
- self.global_symbols[symbol_name] = (target_section, value)
-
- def apply_relocation(self, all_global_symbols):
- # find relocation tables
- relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]]
- for sh in relocations:
- target = sh.target.content
-
- for reloc in sh.content.relatab:
-
- if reloc.symbol.st_shndx == SHN_UNDEF:
- # This is an extern symbol, find it in all_global_symbols
- sym_address = all_global_symbols[reloc.symbol.name]
- print "0x%x" % sym_address
- else:
- # source == in which section it is defined
- source = self.shdrs[reloc.symbol.st_shndx].content
- sym_address = source.virt_addr + reloc.symbol.st_value
-
- target_ba = target.data # The actual BinArray that we'll modify
- pc_address = target.virt_addr + reloc.r_offset
-
- if reloc.r_type == R_X86_64_64:
- format = "<Q" # Direct 64 bit address
- target_value = sym_address + reloc.r_addend
- elif reloc.r_type == R_X86_64_PC32:
- format = "<i" # PC relative 32 bit signed
- target_value = sym_address + reloc.r_addend - pc_address
- elif reloc.r_type == R_X86_64_32:
- format = "<I" # Direct 32 bit zero extended
- target_value = sym_address + reloc.r_addend
- elif reloc.r_type == R_X86_64_PC16:
- format = "<h" # 16 bit sign extended pc relative
- target_value = sym_address + reloc.r_addend - pc_address
- elif reloc.r_type == R_X86_64_16:
- format = "<H" # Direct 16 bit zero extended
- target_value = sym_address + reloc.r_addend
- elif reloc.r_type == R_X86_64_PC8:
- format = "b" # 8 bit sign extended pc relative
- target_value = sym_address + reloc.r_addend - pc_address
- elif reloc.r_type == R_X86_64_8:
- format = "b" # Direct 8 bit sign extended
- target_value = sym_address + reloc.r_addend
- else:
- print "Unsupported relocation type: %s" % reloc.r_type
- exit(1)
-
- d = BinArray(struct.pack(format, target_value))
- start = reloc.r_offset
- end = start + len(d)
- target_ba[start:end] = d
-
-
- def add_phdr(self, phdr):
- self.phdrs.append(phdr)
- self.header.e_phnum = len(self.phdrs)
- phdr.owner = self
-
- def add_segment(self, segment):
- self.segments.append(segment)
-
- def layout(self, base_vaddr):
- """Do the actual layout for final executable."""
-
- virt_addr = base_vaddr
- file_offset = 0
- self.virt_addr = base_vaddr
- self.file_offset = file_offset
- for s in self.segments:
- virt_addr += s.align
- s.virt_addr = virt_addr
- s.file_offset = file_offset
- s.layout()
- virt_addr += s.logical_size
- file_offset += s.physical_size
-
- def toBinArray(self):
- ba = BinArray()
- for s in self.segments:
- ba.extend(s.toBinArray())
- return ba
-
-
-#--------------------------------------------------------------------------
-# Elf file header
-#--------------------------------------------------------------------------
-
-class Elf64_eident(object):
- """Detailed representation for the Elf identifier."""
- format = "16B"
- size = struct.calcsize(format)
- physical_size = size
- logical_size = size
-
- def __init__(self, rawdata=None):
- object.__init__(self)
- if rawdata:
- self.fromBinArray(rawdata)
-
- def fromBinArray(self, rawdata):
- t = struct.unpack(self.format, rawdata)
- self.ei_magic = rawdata[:4]
- self.ei_class = ElfClass(rawdata[4])
- self.ei_data = ElfData(rawdata[5])
- self.ei_version = ElfVersion(rawdata[6])
- self.ei_osabi = ElfOsAbi(rawdata[7])
- self.ei_abiversion = 0
- self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
-
- def make_default_amd64(self):
- self.ei_magic = BinArray([0x7f, 0x45, 0x4c, 0x46])
- self.ei_class = ELFCLASS64
- self.ei_data = ELFDATA2LSB
- self.ei_version = EV_CURRENT
- self.ei_osabi = ELFOSABI_SYSV
- self.ei_abiversion = 0
- self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
-
- def toBinArray(self):
- ba = BinArray(self.ei_magic)
- ba.append(self.ei_class)
- ba.append(self.ei_data)
- ba.append(self.ei_version)
- ba.append(self.ei_osabi)
- ba.append(self.ei_abiversion)
- ba.extend(self.ei_pad)
- return ba
-
-
-class Elf64_Ehdr(object):
- """Elf file header"""
- format = "<16B 2H I 3Q I 6H"
- size = struct.calcsize(format)
- physical_size = size
- logical_size = size
-
- def __init__(self, rawdata=None):
- object.__init__(self)
- self.e_ident = Elf64_eident()
- self.e_type = ET_NONE
- self.e_machine = EM_X86_64
- self.e_version = EV_CURRENT
- self.e_entry = 0
- self.e_phoff = 0
- self.e_shoff = 0
- self.e_flags = 0
- self.e_ehsize = self.size
- self.e_phentsize = Elf64_Phdr.size
- self.e_phnum = 0
- self.e_shentsize = Elf64_Shdr.size
- self.e_shnum = 0
- self.e_shstrndx = 0
- if rawdata:
- self.fromBinArray(rawdata)
-
- def fromBinArray(self, rawdata):
- t = struct.unpack(self.format, rawdata)
- self.e_ident = Elf64_eident(BinArray(rawdata[:16]))
- self.e_type = ElfType(t[16])
- self.e_machine = ElfMachine(t[17])
- self.e_version = ElfVersion(t[18])
- self.e_entry = t[19]
- self.e_phoff = t[20]
- self.e_shoff = t[21]
- self.e_flags = t[22]
- self.e_ehsize = t[23]
- self.e_phentsize = t[24]
- self.e_phnum = t[25]
- self.e_shentsize = t[26]
- self.e_shnum = t[27]
- self.e_shstrndx = t[28]
-
- def toBinArray(self):
- # Build a list from e_ident and all other fields, to feed struct.pack.
- values = self.e_ident.toBinArray().tolist()
- values.extend([self.e_type, self.e_machine, self.e_version, self.e_entry,
- self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize,
- self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx])
- res = struct.pack(self.format, *values)
- return BinArray(res)
-
- def layout(self):
- pass
-
-
-#--------------------------------------------------------------------------
-# Elf Sections
-#--------------------------------------------------------------------------
-
-class Elf64_Shdr(object):
- """Elf64 section header."""
- format = "<2I 4Q 2I 2Q"
- size = struct.calcsize(format)
- physical_size = size
- logical_size = size
-
- def __init__(self, index=None, rawdata=None):
- object.__init__(self)
- self.index = index
- if rawdata:
- self.fromBinArray(rawdata)
-
- def fromBinArray(self, rawdata):
- t = struct.unpack(self.format, rawdata)
- self.sh_name = t[0]
- self.sh_type = ElfShType(t[1])
- self.sh_flags = t[2]
- self.sh_addr = t[3]
- self.sh_offset = t[4]
- self.sh_size = t[5]
- self.sh_link = t[6]
- self.sh_info = t[7]
- self.sh_addralign = t[8]
- self.sh_entsize = t[9]
-
- def resolve_names(self):
- self.content.resolve_names(self.owner)
-
- @nested_property
- def content():
- def fget(self):
- return self._content
- def fset(self, data):
- """Use the Section factory to get the subclass corresponding to the
- session type specified in this header)."""
- self._content = Section(self, data)
- return locals()
-
-# For sections that contain elements of specific types :
-
-class Elf64_Sym(object):
- """Symbol Table entry"""
- format = "<I 2B H 2Q "
- entsize = struct.calcsize(format)
- def __init__(self, rawdata=None):
- object.__init__(self)
- if rawdata:
- self.fromBinArray(rawdata)
-
- @nested_property
- def st_binding():
- def fget(self):
- return ElfSymbolBinding((self.st_info >> 4) & 0x0f)
- def fset(self, value):
- self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f))
- return locals()
-
- @nested_property
- def st_type():
- def fget(self):
- return ElfSymbolType(self.st_info & 0x0f)
- def fset(self, value):
- self.st_info = ((self.st_info & 0xf0) | (value & 0x0f))
- return locals()
-
- @nested_property
- def st_visibility():
- def fget(self):
- return ElfSymbolVisibility(self.st_other & 0x03)
- def fset(self, value):
- self.st_other = ((self.st_other & 0xfc) | (value & 0x03))
- return locals()
-
- def fromBinArray(self, rawdata):
- t = struct.unpack(self.format, rawdata)
- self.st_name = t[0] # index in the strtab pointed by sh_link
- self.st_info = t[1]
- self.st_other = t[2]
- self.st_shndx = ElfSectionIndex(t[3])
- self.st_value = t[4]
- self.st_size = t[5]
-
-
-class Elf64_Rel(object):
- format = "<2Q"
- def __init__(self, rawdata=None):
- object.__init__(self)
- self.r_addend = 0 # No addend in a Rel.
- if rawdata:
- self.fromBinArray(rawdata)
-
- def fromBinArray(sef, rawdata):
- t = struct.unpack(self.format, rawdata)
- self.r_offset = t[0]
- self.r_info = t[1]
-
- @nested_property
- def r_sym():
- def fget(self):
- return (self.r_info >> 32) & 0xffffffff
- def fset(self, value):
- self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff)
- return locals()
-
- @nested_property
- def r_type():
- def fget(self):
- return Amd64Relocation(self.r_info & 0xffffffff)
- def fset(self, value):
- self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff)
- return locals()
-
-
-class Elf64_Rela(Elf64_Rel):
- format = "<2Q q"
- def __init__(self, rawdata=None):
- Elf64_Rel.__init__(self, rawdata)
-
- def fromBinArray(self, rawdata):
- t = struct.unpack(self.format, rawdata)
- self.r_offset = t[0]
- self.r_info = t[1]
- self.r_addend = t[2]
-
-
-class Elf64_Dyn(object):
- format = "<2Q"
- size = struct.calcsize(format)
- def __init__(self, tag, value):
- object.__init__(self)
- self.d_tag = tag
- self.d_val = value
-
- @nested_property
- def d_ptr():
- def fget(self):
- return self.d_val
- def fset(self, value):
- self.d_val = value
- return locals()
-
-
-# Sections types :
-
-def Section(shdr, data=None):
- """A section factory"""
- dataclass = {
- SHT_NULL: SNull,
- SHT_PROGBITS: SProgBits,
- SHT_SYMTAB: SSymtab,
- SHT_STRTAB: SStrtab,
- SHT_RELA: SRela,
- SHT_HASH: SHash,
- SHT_DYNAMIC: SDynamic,
- SHT_NOTE: SNote,
- SHT_NOBITS: SNobits,
- SHT_REL: SRel,
- SHT_SHLIB: SShlib,
- SHT_DYNSYM: SDynsym
- }
- if shdr.sh_type in dataclass:
- return dataclass[shdr.sh_type](shdr, data)
- else:
- return BaseSection(shdr, data)
-
-
-class BaseSection(object):
- def __init__(self, shdr, data=None):
- object.__init__(self)
- self.data = None
- self.header = shdr
- if data:
- self.fromBinArray(data)
-
- def fromBinArray(self, data):
- self.data = data
-
- def toBinArray(self):
- if self.data:
- return self.data
- else:
- return BinArray()
-
- def resolve_names(self, elf):
- """Nothing to resolve."""
- pass
-
- @nested_property
- def size():
- def fget(self):
- return len(self.data)
- return locals()
- physical_size = size
- logical_size = size
-
- def layout(self):
- pass
-
-
-class SNull(BaseSection):
- def __init__(self, shdr, data=None):
- BaseSection.__init__(self, shdr, None)
-
-
-class SProgBits(BaseSection):
- def __init__(self, shdr, data=None):
- BaseSection.__init__(self, shdr, data)
-
-
-class SSymtab(BaseSection):
- entsize = struct.calcsize(Elf64_Sym.format)
- def __init__(self, shdr, data=None):
- self.symtab = []
- BaseSection.__init__(self, shdr, data)
-
- def fromBinArray(self, data):
- BaseSection.fromBinArray(self, data)
- nument = len(data) / self.entsize
- for i in range(nument):
- start = i * self.entsize
- end = i * self.entsize + self.entsize
- self.symtab.append(Elf64_Sym(data[start:end]))
-
- def resolve_names(self, elf):
- # For a symtab, the strtab is indicated by sh_link
- strtab = elf.shdrs[self.header.sh_link].content
- # Resolve for all symbols in the table
- for sym in self.symtab:
- sym.name = strtab[sym.st_name]
-
- def __getitem__(self, key):
- return self.symtab[key]
-
-
-class SStrtab(BaseSection):
- def __init__(self, shdr, data=None):
- self.strtab = {}
- BaseSection.__init__(self, shdr, data)
-
- def fromBinArray(self, data):
- BaseSection.fromBinArray(self, data)
- itab = data.tostring().split('\0')
- i = 0
- for sname in itab:
- self.strtab[i] = sname
- i += len(sname) + 1
-
- def __getitem__(self, key):
- if key in self.strtab:
- return self.strtab[key]
- else:
- v = self.data[key:].tostring().split('\0')[0]
- self.strtab[key] = v
- return v
-
- def iteritems(self):
- return self.strtab.iteritems()
-
-
-class SRela(BaseSection):
- entsize = struct.calcsize(Elf64_Rela.format)
- def __init__(self, shdr, data=None):
- self.relatab = []
- BaseSection.__init__(self, shdr, data)
-
- def fromBinArray(self, data):
- BaseSection.fromBinArray(self, data)
- nument = len(data) / self.entsize
- for i in range(nument):
- start = i * self.entsize
- end = i * self.entsize + self.entsize
- self.relatab.append(Elf64_Rela(data[start:end]))
-
- def resolve_names(self, elf):
- """Badly named, this wil resolve to a symtab entry..."""
- # sh_link leads to the symtab
- self.symtab = elf.shdrs[self.header.sh_link].content
- # sh_info links to the section on which the relocation applies
- self.header.target = elf.shdrs[self.header.sh_info]
- for r in self.relatab:
- r.symbol = self.symtab[r.r_sym]
-
-
-
-class SHash(BaseSection):
- pass
-
-
-class SDynamic(BaseSection):
- pass
-
-
-class SNote(BaseSection):
- pass
-
-
-class SNobits(BaseSection):
- size = 0
- physical_size = 0
-
- @nested_property
- def logical_size():
- def fget(self):
- return self.header.sh_size
- return locals()
-
- def toBinArray(self):
- return BinArray()
-
-class SRel(BaseSection):
- pass
-
-
-class SShlib(BaseSection):
- pass
-
-
-class SDynsym(SSymtab):
- pass
-
-
-class Elf64_Phdr(object):
- format = "<2I 6Q"
- size = struct.calcsize(format)
- physical_size = size
- logical_size = size
-
- def __init__(self):
- object.__init__(self)
- self.p_type = PT_NULL
- self.p_flags = PF_X + PF_W + PF_R
- self.p_offset = 0
- self.p_vaddr = 0
- self.p_paddr = 0
- self.p_filesz = 0
- self.p_memsz = 0
- self.p_align = 1
- #self.content = []
- #self.nobits = []
-
- def toBinArray(self):
- res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset,
- self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align)
- return BinArray(res)
-
- def layout(self):
- pass
-
- #def add_content(self, content):
- # self.content.append(content)
-
- #def add_empty_content(self, content):
- # self.nobits.append(content)
-
- #@nested_property
- #def content_size():
- # def fget(self):
- # return sum(s.sh_size for s in self.content)
- # return locals()
-
-
-class BaseSegment(object):
- def __init__(self, align=0):
- object.__init__(self)
- self.align = align
- self.content = []
-
- def add_content(self, content):
- self.content.append(content)
-
- def toBinArray(self):
- ba = BinArray()
- for c in self.content:
- ba.extend(c.toBinArray())
- return ba
-
- @nested_property
- def size():
- def fget(self):
- return sum(c.size for c in self.content)
- return locals()
- physical_size = size
- logical_size = size
-
-
-class TextSegment(BaseSegment):
- def __init__(self, align=0):
- BaseSegment.__init__(self, align)
-
- def layout(self):
- virt_addr = self.virt_addr
- file_offset = self.file_offset
- for i in self.content:
- i.virt_addr = virt_addr
- i.file_offset = file_offset
- i.layout()
- virt_addr += i.logical_size
- file_offset += i.physical_size
-
-
-class DataSegment(BaseSegment):
- def __init__(self, align=0):
- BaseSegment.__init__(self, align)
- self.nobits = []
-
- def add_nobits(self, content):
- self.nobits.append(content)
-
- def layout(self):
- virt_addr = self.virt_addr
- file_offset = self.file_offset
- for i in self.content:
- i.virt_addr = virt_addr
- i.file_offset = file_offset
- i.layout()
- virt_addr += i.logical_size
- file_offset += i.physical_size
- for i in self.nobits:
- i.virt_addr = virt_addr
- i.file_offset = 0
- i.layout()
- virt_addr += i.logical_size
-
- @nested_property
- def logical_size():
- def fget(self):
- return self.physical_size + sum(c.logical_size for c in self.nobits)
- return locals()
-
-
-
-class PStrtab(object):
- def __init__(self):
- object.__init__(self)
- self.table = []
- self.virt_addr = None
-
- def append(self, string):
- if len(self.table):
- offset = self.table[-1][0]
- offset += len(self.table[-1][1])
- else:
- offset = 0
- new_str = string + '\0'
- self.table.append((offset, new_str))
- return offset
-
- @nested_property
- def size():
- def fget(self):
- return (self.table[-1][0] + len(self.table[-1][1]))
- return locals()
- physical_size = size
- logical_size = size
-
- def toBinArray(self):
- ba = BinArray()
- for s in (i[1] for i in self.table):
- ba.fromstring(s)
- return ba
-
- def layout(self):
- pass
-
-
-class Dynamic(object):
- def __init__(self):
- object.__init__(self)
- self.dyntab = []
- self.strtab = PStrtab()
-
- @nested_property
- def size():
- def fget(self):
- # End the table with a DT_NULL without associated value.
- return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q"))
- return locals()
- physical_size = size
- logical_size = size
-
- def add_shlib(self, shlib):
- offset = self.strtab.append(shlib)
- self.dyntab.append((DT_NEEDED, offset))
-
- def add_symtab(self, vaddr):
- self.dyntab.append((DT_SYMTAB, vaddr))
-
- def add_debug(self):
- self.dyntab.append((DT_DEBUG, 0))
-
- def layout(self):
- # Adjust the address of the strtab, if
- if self.strtab.virt_addr is None:
- print "Ooops, strtab's address is not known yet. Aborting."
- exit(1)
- else:
- self.dyntab.append((DT_STRTAB, self.strtab.virt_addr))
-
- @nested_property
- def dt_debug_address():
- def fget(self):
- for i, d in enumerate(self.dyntab):
- if d[0] == DT_DEBUG:
- return self.virt_addr + (i*16 + 8)
- return locals()
-
-
- def toBinArray(self):
- ba = BinArray()
- for i in self.dyntab:
- s = struct.pack("<2Q", i[0], i[1])
- ba.fromstring(s)
- null = struct.pack("<Q", DT_NULL)
- ba.fromstring(null)
- return ba
-
-
-class Interpreter(object):
- default_interpreter = "/lib64/ld-linux-x86-64.so.2"
-
- def __init__(self, interpreter=None):
- object.__init__(self)
- if interpreter:
- self.interpreter = interpreter
- else:
- self.interpreter = self.default_interpreter
-
- @nested_property
- def size():
- def fget(self):
- # Null terminated
- return len(self.interpreter) + 1
- return locals()
- physical_size = size
- logical_size = size
-
- def toBinArray(self):
- ba = BinArray(self.interpreter)
- ba.append(0)
- return ba
-
- def layout(self):
- pass