From: Amand Tihon Date: Fri, 22 May 2009 00:46:53 +0000 (+0200) Subject: Cleaning and refactoring. X-Git-Tag: bold-0.1.0~8 X-Git-Url: https://git.alrj.org/?a=commitdiff_plain;h=7eeaa837d3a6f29f9312bf29962214e709663e52;p=bold.git Cleaning and refactoring. Moved the link work to a Linker class. Added exception handling in main. Make the Dynamic section use SStrtab and Elf64_Dyn types instead of its own types. Move all errors in a dedicated file. Check for undefined symbols. Check that input files are relocatable objects and of a supported type. Bits of source cleaning. --- diff --git a/Bold/BinArray.py b/Bold/BinArray.py new file mode 100644 index 0000000..84d5725 --- /dev/null +++ b/Bold/BinArray.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + +from array import array +import struct + +class BinArray(array): + """A specialized array that contains bytes""" + def __new__(cls, data=None): + if data: + return array.__new__(BinArray, "B", data) + else: + return array.__new__(BinArray, "B") diff --git a/Bold/__init__.py b/Bold/__init__.py new file mode 100644 index 0000000..f090c5a --- /dev/null +++ b/Bold/__init__.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- + +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + +""" + Empty file in place. + Utilities lies in here, + Ready to be used. +""" \ No newline at end of file diff --git a/Bold/constants.py b/Bold/constants.py new file mode 100644 index 0000000..72d0100 --- /dev/null +++ b/Bold/constants.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- + +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# Heavily inspired by elf.h from the GNU C Library. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + +"""This file defines standard ELF constants.""" + +class SymbolicConstant(long): + """Allows you to map a symbolic name with a given integer.""" + _symbolics = {} + _default = None + def __new__(cls, value, symbolic=None): + if symbolic: + cls._symbolics[value] = symbolic + return long.__new__(cls, value) + + def __str__(self): + if long(self) in self._symbolics: + return self._symbolics[long(self)] + elif self._default: + return self._default % long(self) + else: + return str(long(self)) + + +class ElfClass(SymbolicConstant): + _symbolics = {} +ELFCLASSNONE = ElfClass(0, "Invalid ELF class") +ELFCLASS32 = ElfClass(1, "ELF32") +ELFCLASS64 = ElfClass(2, "ELF64") + + +class ElfData(SymbolicConstant): + _symbolics = {} +ELFDATANONE = ElfData(0, "Invalid data encoding") +ELFDATA2LSB = ElfData(1, "Little endian") +ELFDATA2MSB = ElfData(2, "Big endian") + + +class ElfVersion(SymbolicConstant): + _symbolics = {} +EV_NONE = ElfVersion(0, "Invalid ELF version") +EV_CURRENT = ElfVersion(1, "Current version (1)") + + +class ElfOsAbi(SymbolicConstant): + _symbolics = {} +# Fill me +ELFOSABI_NONE = ElfOsAbi(0, "UNIX - System V") +ELFOSABI_SYSV = ElfOsAbi(0, "UNIX - System V") + + +class ElfType(SymbolicConstant): + _symbolics = {} +ET_NONE = ElfType(0, "No file type") +ET_REL = ElfType(1, "Relocatable file") +ET_EXEC = ElfType(2, "Executable file") +ET_DYN = ElfType(3, "Shared object file") +ET_CORE = ElfType(4, "Core file") + + +class ElfMachine(SymbolicConstant): + _symbolics = {} +# Fill me +EM_NONE = ElfMachine(0, "No machine") +EM_386 = ElfMachine(3, "Intel 80386") +EM_X86_64 = ElfMachine(62, "AMD x86-64 architecture") + +class ElfSectionIndex(SymbolicConstant): + _symbolics = {} +SHN_UNDEF = ElfSectionIndex(0, "UND") +SHN_ABS = ElfSectionIndex(0xfff1, "ABS") +SHN_COMMON = ElfSectionIndex(0xfff2, "COM") + +class ElfShType(SymbolicConstant): + _symbolics = {} +SHT_NULL = ElfShType(0, "NULL") +SHT_PROGBITS = ElfShType(1, "PROGBITS") +SHT_SYMTAB = ElfShType(2, "SYMTAB") +SHT_STRTAB = ElfShType(3, "STRTAB") +SHT_RELA = ElfShType(4, "RELA") +SHT_HASH = ElfShType(5, "HASH") +SHT_DYNAMIC = ElfShType(6, "DYNAMIC") +SHT_NOTE = ElfShType(7, "NOTE") +SHT_NOBITS = ElfShType(8, "NOBITS") +SHT_REL = ElfShType(9, "REL") +SHT_SHLIB = ElfShType(10, "SHLIB") +SHT_DYNSYM = ElfShType(11, "DYNSYM") + +SHF_WRITE = 0x1 +SHF_ALLOC = 1 << 1 +SHF_EXECINSTR = 1 << 2 +SHF_MERGE = 1 << 4 +SHF_STRINGS = 1 << 5 +SHF_INFO_LINK = 1 << 6 +SHF_LINK_ORDER = 1 << 7 +SHF_OS_NONCONFORMING = 1 << 8 +SHF_GROUP = 1 << 9 +SHF_TLS = 1 << 10 +SHF_MASKOS = 0x0f00000 +SHF_MASKPROC = 0xf000000 + +STN_UNDEF = 0 + + +class ElfSymbolBinding(SymbolicConstant): + _symbolics = {} +STB_LOCAL = ElfSymbolBinding(0, "LOCAL") +STB_GLOBAL = ElfSymbolBinding(1, "GLOBAL") +STB_WEAK = ElfSymbolBinding(2, "WEAK") + + +class ElfSymbolType(SymbolicConstant): + _symbolics = {} +STT_NOTYPE = ElfSymbolType(0, "NOTYPE") +STT_OBJECT = ElfSymbolType(1, "OBJECT") +STT_FUNC = ElfSymbolType(2, "FUNC") +STT_SECTION = ElfSymbolType(3, "SECTION") +STT_FILE = ElfSymbolType(4, "FILE") +STT_COMMON = ElfSymbolType(5, "COMMON") +STT_TLS = ElfSymbolType(6, "TLS") + + +class ElfSymbolVisibility(SymbolicConstant): + _symbolics = {} +STV_DEFAULT = ElfSymbolVisibility(0, "DEFAULT") +STV_INTERNAL = ElfSymbolVisibility(1, "INTERN") +STV_HIDDEN = ElfSymbolVisibility(2, "HIDDEN") +STV_PROTECTED = ElfSymbolVisibility(3, "PROTECTED") + + +class ElfPhType(SymbolicConstant): + _symbolics = {} +PT_NULL = ElfPhType(0, "NULL") +PT_LOAD = ElfPhType(1, "LOAD") +PT_DYNAMIC = ElfPhType(2, "DYNAMIC") +PT_INTERP = ElfPhType(3, "INTERP") +PT_NOTE = ElfPhType(4, "NOTE") +PT_SHLIB = ElfPhType(5, "SHLIB") +PT_PHDR = ElfPhType(6, "PHDR") +PT_TLS = ElfPhType(7, "TLS") + +PF_X = (1 << 0) +PF_W = (1 << 1) +PF_R = (1 << 2) + +class ElfDynamicType(SymbolicConstant): + _symbolics = {} + _default = "Unknown (0x%x)" +DT_NULL = ElfDynamicType(0, "NULL") +DT_NEEDED = ElfDynamicType(1, "NEEDED") +DT_PLTRELSZ = ElfDynamicType(2, "PLTRELSZ") +DT_PLTGOT = ElfDynamicType(3, "PLTGOT") +DT_HASH = ElfDynamicType(4, "HASH") +DT_STRTAB = ElfDynamicType(5, "STRTAB") +DT_SYMTAB = ElfDynamicType(6, "SYMTAB") +DT_RELA = ElfDynamicType(7, "RELA") +DT_RELASZ = ElfDynamicType(8, "RELASZ") +DT_RELAENT = ElfDynamicType(9, "RELAENT") +DT_STRSZ = ElfDynamicType(10, "STRSZ") +DT_SYMENT = ElfDynamicType(11, "SYMENT") +DT_INIT = ElfDynamicType(12, "INIT") +DT_FINI = ElfDynamicType(13, "FINI") +DT_SONAME = ElfDynamicType(14, "SONAME") +DT_RPATH = ElfDynamicType(15, "RPATH") +DT_SYMBOLIC = ElfDynamicType(16, "SYMBOLIC") +DT_REL = ElfDynamicType(17, "REL") +DT_RELSZ = ElfDynamicType(18, "RELSZ") +DT_RELENT = ElfDynamicType(19, "RELENT") +DT_PLTREL = ElfDynamicType(20, "PLTREL") +DT_DEBUG = ElfDynamicType(21, "DEBUG") +DT_TEXTREL = ElfDynamicType(22, "TEXTREL") +DT_JMPREL = ElfDynamicType(23, "JMPREL") +DT_BIND_NOW = ElfDynamicType(24, "BIND_NOW") +DT_INIT_ARRAY = ElfDynamicType(25, "INIT_ARRAY") +DT_FINI_ARRAY = ElfDynamicType(26, "FINI_ARRAY") +DT_INIT_ARRAYSZ = ElfDynamicType(27, "INIT_ARRAYSZ") +DT_FINI_ARRAYSZ = ElfDynamicType(28, "FINI_ARRAYSZ") +DT_RUNPATH = ElfDynamicType(29, "RUNPATH") +DT_FLAGS = ElfDynamicType(30, "FLAGS") +DT_ENCODING = ElfDynamicType(31, "ENCODING") +DT_PREINIT_ARRAY = ElfDynamicType(32, "PREINIT_ARRAY") +DT_PREINIT_ARRAYSZ = ElfDynamicType(33, "PREINIT_ARRAYSZ") + +# AMD x86-64 relocations +class Amd64Relocation(SymbolicConstant): + _symbolics = {} + +R_X86_64_NONE = Amd64Relocation(0, "NONE") +R_X86_64_64 = Amd64Relocation(1, "64") +R_X86_64_PC32 = Amd64Relocation(2, "PC32") +R_X86_64_GOT32 = Amd64Relocation(3, "GOT32") +R_X86_64_PLT32 = Amd64Relocation(4, "PLT32") +R_X86_64_COPY = Amd64Relocation(5, "COPY") +R_X86_64_GLOB_DAT = Amd64Relocation(6, "GLOB_DAT") +R_X86_64_JUMP_SLOT = Amd64Relocation(7, "JUMP_SLOT") +R_X86_64_RELATIVE = Amd64Relocation(8, "RELATIVE") +R_X86_64_GOTPCREL = Amd64Relocation(9, "GOTPCREL") +R_X86_64_32 = Amd64Relocation(10, "32") +R_X86_64_32S = Amd64Relocation(11, "32S") +R_X86_64_16 = Amd64Relocation(12, "16") +R_X86_64_PC16 = Amd64Relocation(13, "PC16") +R_X86_64_8 = Amd64Relocation(14, "8") +R_X86_64_PC8 = Amd64Relocation(15, "PC8") +R_X86_64_DTPMOD64 = Amd64Relocation(16, "DTPMOD64") +R_X86_64_DTPOFF64 = Amd64Relocation(17, "DTPOFF64") +R_X86_64_TPOFF64 = Amd64Relocation(18, "TPOFF64") +R_X86_64_TLSGD = Amd64Relocation(19, "TLSGD") +R_X86_64_TLSLD = Amd64Relocation(20, "TLSLD") +R_X86_64_DTPOFF32 = Amd64Relocation(21, "DTPOFF32") +R_X86_64_GOTTPOFF = Amd64Relocation(22, "GOTTPOFF") +R_X86_64_TPOFF32 = Amd64Relocation(23, "TPOFF32") + + + + + + + + + + + + + + + + + + + + + diff --git a/Bold/elf.py b/Bold/elf.py new file mode 100644 index 0000000..065be52 --- /dev/null +++ b/Bold/elf.py @@ -0,0 +1,867 @@ +# -*- coding: utf-8 -*- + +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + + +from BinArray import BinArray +from constants import * +from errors import * +import struct + +# Helpful decorator +def nested_property(c): + return property(**c()) + +#-------------------------------------------------------------------------- +# Elf +#-------------------------------------------------------------------------- + +class Elf64(object): + """Handles an Elf64 object.""" + interpreter = "/lib64/ld-linux-x86-64.so.2" + + def __init__(self, path=None): + object.__init__(self) + self.header = Elf64_Ehdr() + self.header.owner = self + self.shdrs = [] + self.phdrs = [] + self.shlibs = [] + self.sections = {} + self.segments = [] + self.local_symbols = {} + self.global_symbols = {} + self.undefined_symbols = [] + + if path: + self.fromfile(path) + + # Functions for relocatables files used as input + + def fromfile(self, path): + f = file(path, "rb") + + # Load Elf header + data = BinArray() + data.fromfile(f, Elf64_Ehdr.size) + self.header.fromBinArray(data) + + # This linker only supports relocatable objects + if self.header.e_type != ET_REL: + raise NotRelocatableObject(path) + + if self.header.e_ident.ei_class != ELFCLASS64: + raise UnsupportedObject(path, "Not %s" % ELFCLASS64) + + if self.header.e_machine != EM_X86_64: + raise UnsupportedObject(path, "Not %s" % EM_X86_64) + + # Load sections headers + f.seek(self.header.e_shoff) + for i in range(self.header.e_shnum): + data = BinArray() + data.fromfile(f, self.header.e_shentsize) + h = Elf64_Shdr(i, data) + h.owner = self + self.shdrs.append(h) + + # Read sections content + for sh in self.shdrs: + data = BinArray() + if sh.sh_type != SHT_NOBITS: + f.seek(sh.sh_offset) + data.fromfile(f, sh.sh_size) + sh.content = data + + f.close() + + def resolve_names(self): + # The .shstrtab index is in Elf Header. find the sections names + strtab = self.shdrs[self.header.e_shstrndx].content + + for sh in self.shdrs: + sh.name = strtab[int(sh.sh_name)] + self.sections[sh.name] = sh + + # And resolve names in the section itself + sh.resolve_names() + + + def find_symbols(self): + for sh in self.shdrs: + if sh.sh_type == SHT_SYMTAB: + symtab = sh.content.symtab + + for symbol in symtab: + if symbol.st_type == STT_FILE: + continue + if symbol.st_shndx == SHN_ABS: + continue + if symbol.st_shndx == SHN_UNDEF: + if symbol.name: + self.undefined_symbols.append(symbol.name) + continue + + target_section = self.shdrs[symbol.st_shndx] + + symbol_name = symbol.name + value = symbol.st_value + bind = symbol.st_binding + + # We got a name, a target section, and an offset in the section + if symbol.st_binding == STB_LOCAL: + if symbol.st_type == STT_SECTION: + symbol_name = target_section.name + self.local_symbols[symbol_name] = (target_section, value) + else: + self.global_symbols[symbol_name] = (target_section, value) + + def apply_relocation(self, all_global_symbols): + # find relocation tables + relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]] + for sh in relocations: + target = sh.target.content + + for reloc in sh.content.relatab: + + if reloc.symbol.st_shndx == SHN_UNDEF: + # This is an extern symbol, find it in all_global_symbols + sym_address = all_global_symbols[reloc.symbol.name] + else: + # source == in which section it is defined + source = self.shdrs[reloc.symbol.st_shndx].content + sym_address = source.virt_addr + reloc.symbol.st_value + + target_ba = target.data # The actual BinArray that we'll modify + pc_address = target.virt_addr + reloc.r_offset + + if reloc.r_type == R_X86_64_64: + format = "> 4) & 0x0f) + def fset(self, value): + self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f)) + return locals() + + @nested_property + def st_type(): + def fget(self): + return ElfSymbolType(self.st_info & 0x0f) + def fset(self, value): + self.st_info = ((self.st_info & 0xf0) | (value & 0x0f)) + return locals() + + @nested_property + def st_visibility(): + def fget(self): + return ElfSymbolVisibility(self.st_other & 0x03) + def fset(self, value): + self.st_other = ((self.st_other & 0xfc) | (value & 0x03)) + return locals() + + def fromBinArray(self, rawdata): + t = struct.unpack(self.format, rawdata) + self.st_name = t[0] # index in the strtab pointed by sh_link + self.st_info = t[1] + self.st_other = t[2] + self.st_shndx = ElfSectionIndex(t[3]) + self.st_value = t[4] + self.st_size = t[5] + + +class Elf64_Rel(object): + format = "<2Q" + def __init__(self, rawdata=None): + object.__init__(self) + self.r_addend = 0 # No addend in a Rel. + if rawdata is not None: + self.fromBinArray(rawdata) + + def fromBinArray(sef, rawdata): + t = struct.unpack(self.format, rawdata) + self.r_offset = t[0] + self.r_info = t[1] + + @nested_property + def r_sym(): + def fget(self): + return (self.r_info >> 32) & 0xffffffff + def fset(self, value): + self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff) + return locals() + + @nested_property + def r_type(): + def fget(self): + return Amd64Relocation(self.r_info & 0xffffffff) + def fset(self, value): + self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff) + return locals() + + +class Elf64_Rela(Elf64_Rel): + format = "<2Q q" + def __init__(self, rawdata=None): + Elf64_Rel.__init__(self, rawdata) + + def fromBinArray(self, rawdata): + t = struct.unpack(self.format, rawdata) + self.r_offset = t[0] + self.r_info = t[1] + self.r_addend = t[2] + + +class Elf64_Dyn(object): + format = "<2Q" + size = struct.calcsize(format) + def __init__(self, tag, value): + object.__init__(self) + self.d_tag = tag + self.d_val = value + + @nested_property + def d_ptr(): + def fget(self): + return self.d_val + def fset(self, value): + self.d_val = value + return locals() + + def toBinArray(self): + ba = BinArray() + ba.fromstring(struct.pack(self.format, self.d_tag, self.d_val)) + return ba + +# Sections types : + +def Section(shdr, data=None): + """A section factory""" + dataclass = { + SHT_NULL: SNull, + SHT_PROGBITS: SProgBits, + SHT_SYMTAB: SSymtab, + SHT_STRTAB: SStrtab, + SHT_RELA: SRela, + SHT_HASH: SHash, + SHT_DYNAMIC: SDynamic, + SHT_NOTE: SNote, + SHT_NOBITS: SNobits, + SHT_REL: SRel, + SHT_SHLIB: SShlib, + SHT_DYNSYM: SDynsym + } + if shdr.sh_type in dataclass: + return dataclass[shdr.sh_type](shdr, data) + else: + return BaseSection(shdr, data) + + +class BaseSection(object): + def __init__(self, shdr, rawdata=None): + object.__init__(self) + self.data = None + self.header = shdr + if rawdata is not None: + self.fromBinArray(rawdata) + + def fromBinArray(self, rawdata): + self.data = rawdata + + def toBinArray(self): + if self.data: + return self.data + else: + return BinArray() + + def resolve_names(self, elf): + """Nothing to resolve.""" + pass + + @nested_property + def size(): + def fget(self): + return len(self.data) + return locals() + physical_size = size + logical_size = size + + def layout(self): + pass + + +class SNull(BaseSection): + def __init__(self, shdr, data=None): + BaseSection.__init__(self, shdr, None) + + +class SProgBits(BaseSection): + def __init__(self, shdr, data=None): + BaseSection.__init__(self, shdr, data) + + +class SSymtab(BaseSection): + entsize = struct.calcsize(Elf64_Sym.format) + def __init__(self, shdr, data=None): + self.symtab = [] + BaseSection.__init__(self, shdr, data) + + def fromBinArray(self, data): + BaseSection.fromBinArray(self, data) + nument = len(data) / self.entsize + for i in range(nument): + start = i * self.entsize + end = i * self.entsize + self.entsize + self.symtab.append(Elf64_Sym(data[start:end])) + + def resolve_names(self, elf): + # For a symtab, the strtab is indicated by sh_link + strtab = elf.shdrs[self.header.sh_link].content + # Resolve for all symbols in the table + for sym in self.symtab: + sym.name = strtab[sym.st_name] + + def __getitem__(self, key): + return self.symtab[key] + + +class SStrtab(BaseSection): + """This one behaves in two completely different ways. + If it's given a section header and data, it will act as read-only, only to + be used for name resolution. + If it's not given any argument, it can be used to create a new Strtab.""" + def __init__(self, shdr=None, data=None): + self.readonly = (shdr is not None) + self.strtab = {} + self.table = [] + BaseSection.__init__(self, shdr, data) + self.virt_addr = None + + def toBinArray(self): + if self.readonly: + return BaseSection.toBinArray() + + ba = BinArray() + keys = self.strtab.keys() + keys.sort() + for k in keys: + ba.fromstring(self.strtab[k] + "\0") + return ba + + @nested_property + def size(): + def fget(self): + if self.readonly: + return len(data) + if len(self.strtab) == 0: + return 0 + return sum((len(x)+1 for x in self.strtab.values())) + return locals() + physical_size = size + logical_size = size + + def iteritems(self): + return self.strtab.iteritems() + + # Resolution functions + + def fromBinArray(self, data): + BaseSection.fromBinArray(self, data) + itab = data.tostring().split('\0') + i = 0 + for sname in itab: + self.strtab[i] = sname + i += len(sname) + 1 + + def __getitem__(self, key): + if key in self.strtab: + return self.strtab[key] + else: + v = self.data[key:].tostring().split('\0')[0] + self.strtab[key] = v + return v + + # Executable creation functions + + def append(self, string): + if len(self.strtab) == 0: + offset = 0 + else: + last = max(self.strtab.keys()) + offset = last + len(self.strtab[last]) + 1 # for the \0 + self.strtab[offset] = string + return offset + + def layout(self): + pass + + +class SRela(BaseSection): + entsize = struct.calcsize(Elf64_Rela.format) + def __init__(self, shdr, data=None): + self.relatab = [] + BaseSection.__init__(self, shdr, data) + + def fromBinArray(self, data): + BaseSection.fromBinArray(self, data) + nument = len(data) / self.entsize + for i in range(nument): + start = i * self.entsize + end = i * self.entsize + self.entsize + self.relatab.append(Elf64_Rela(data[start:end])) + + def resolve_names(self, elf): + """Badly named, this wil resolve to a symtab entry...""" + # sh_link leads to the symtab + self.symtab = elf.shdrs[self.header.sh_link].content + # sh_info links to the section on which the relocation applies + self.header.target = elf.shdrs[self.header.sh_info] + for r in self.relatab: + r.symbol = self.symtab[r.r_sym] + + + +class SHash(BaseSection): + pass + + +class SDynamic(BaseSection): + pass + + +class SNote(BaseSection): + pass + + +class SNobits(BaseSection): + size = 0 + physical_size = 0 + + @nested_property + def logical_size(): + def fget(self): + return self.header.sh_size + return locals() + + def toBinArray(self): + return BinArray() + +class SRel(BaseSection): + pass + + +class SShlib(BaseSection): + pass + + +class SDynsym(SSymtab): + pass + + +class Elf64_Phdr(object): + format = "<2I 6Q" + size = struct.calcsize(format) + physical_size = size + logical_size = size + + def __init__(self): + object.__init__(self) + self.p_type = PT_NULL + self.p_flags = PF_X + PF_W + PF_R + self.p_offset = 0 + self.p_vaddr = 0 + self.p_paddr = 0 + self.p_filesz = 0 + self.p_memsz = 0 + self.p_align = 1 + + def toBinArray(self): + res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset, + self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align) + return BinArray(res) + + def layout(self): + pass + + def update_from_content(self, content): + """ Update ofset, address and sizes. + After having applied layout(),the content knows all these values.""" + self.p_offset = content.file_offset + self.p_vaddr = content.virt_addr + self.p_filesz = content.physical_size + self.p_memsz = content.logical_size + + +class BaseSegment(object): + def __init__(self, align=0): + object.__init__(self) + self.align = align + self.content = [] + + def add_content(self, content): + self.content.append(content) + + def toBinArray(self): + ba = BinArray() + for c in self.content: + ba.extend(c.toBinArray()) + return ba + + @nested_property + def size(): + def fget(self): + return sum(c.size for c in self.content) + return locals() + physical_size = size + logical_size = size + + +class TextSegment(BaseSegment): + def __init__(self, align=0): + BaseSegment.__init__(self, align) + + def layout(self): + virt_addr = self.virt_addr + file_offset = self.file_offset + for i in self.content: + i.virt_addr = virt_addr + i.file_offset = file_offset + i.layout() + virt_addr += i.logical_size + file_offset += i.physical_size + + +class DataSegment(BaseSegment): + def __init__(self, align=0): + BaseSegment.__init__(self, align) + self.nobits = [] + + def add_nobits(self, content): + self.nobits.append(content) + + def layout(self): + virt_addr = self.virt_addr + file_offset = self.file_offset + for i in self.content: + i.virt_addr = virt_addr + i.file_offset = file_offset + i.layout() + virt_addr += i.logical_size + file_offset += i.physical_size + for i in self.nobits: + i.virt_addr = virt_addr + i.file_offset = 0 + i.layout() + virt_addr += i.logical_size + + @nested_property + def logical_size(): + def fget(self): + return self.physical_size + sum(c.logical_size for c in self.nobits) + return locals() + + +class Dynamic(object): + def __init__(self): + object.__init__(self) + self.dyntab = [] + self.strtab = SStrtab() + + @nested_property + def size(): + def fget(self): + # End the table with a DT_NULL without associated value. + return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q")) + return locals() + physical_size = size + logical_size = size + + def add_shlib(self, shlib): + offset = self.strtab.append(shlib) + self.dyntab.append(Elf64_Dyn(DT_NEEDED, offset)) + + def add_symtab(self, vaddr): + self.dyntab.append(Elf64_Dyn(DT_SYMTAB, vaddr)) + + def add_debug(self): + self.dyntab.append(Elf64_Dyn(DT_DEBUG, 0)) + + def layout(self): + # Adjust the address of the strtab, if + if self.strtab.virt_addr is None: + print "Ooops, strtab's address is not known yet. Aborting." + exit(1) + else: + self.dyntab.append(Elf64_Dyn(DT_STRTAB, self.strtab.virt_addr)) + + @nested_property + def dt_debug_address(): + def fget(self): + for i, d in enumerate(self.dyntab): + if d.d_tag == DT_DEBUG: + return self.virt_addr + (i*d.size + (d.size/2)) + return locals() + + + def toBinArray(self): + ba = BinArray() + for d in self.dyntab: + ba.extend(d.toBinArray()) + null = struct.pack(" +# +# This file is part of bold, the Byte Optimized Linker. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + +"""Define all the exceptions.""" + +class NotRelocatableObject(Exception): + """Raised when an input file is not a relocatable ELF object.""" + def __init__(self, path): + self.path = path + def __str__(self): + return "File '%s' is not a relocatable object file" % self.path + +class UnsupportedObject(Exception): + """Raised when an input file is not of a supported arch.""" + def __init__(self, path, reason): + self.path = path + self.reason = reason + def __str__(self): + return "File '%s' is not supported: %s" % (self.path, self.reason) + +class LibNotFound(Exception): + """Raised if a shared library could not be found.""" + def __init__(self, libname): + self.libname = libname + def __str__(self): + return "Cannot find shared library for '%s'" % self.libname + +class UndefinedSymbol(Exception): + """Raised if a symbol is referenced but not declared.""" + def __init__(self, symbol_name): + self.symbol = symbol_name + def __str__(self): + return "Undefined reference to '%s'" % self.symbol + +class RedefinedSymbol(Exception): + """Raised if a symbol is referenced but not declared.""" + def __init__(self, symbol_name): + self.symbol = symbol_name + def __str__(self): + return "Symbol '%s' is declared twice" % self.symbol diff --git a/Bold/linker.py b/Bold/linker.py new file mode 100644 index 0000000..757bb1d --- /dev/null +++ b/Bold/linker.py @@ -0,0 +1,198 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + +""" +Main entry point for the bold linker. +""" + +from constants import * +from elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter +from errors import * +from ctypes.util import find_library + +class BoldLinker(object): + """A Linker object takes one or more objects files, optional shared libs, + and arranges all this in an executable. + + Important note: the external functions from the libraries are NOT resolved. + This import is left to the user, as it can be done more efficiently by hash. + (http://www.linuxdemos.org/contentarticle/how_to_start_4k_introdev_with_ibh) + For this, a very useful symbol is exported, : _dt_debug, the address of the + DT_DEBUG's d_ptr. + """ + + def __init__(self): + object.__init__(self) + + self.objs = [] + self.shlibs = [] + self.entry_point = "_start" + self.output = Elf64() + + def add_object(self, filename): + """Add a relocatable file as input.""" + obj = Elf64(filename) + obj.resolve_names() + obj.find_symbols() + self.objs.append(obj) + + def add_shlib(self, libname): + """Add a shared library to link against.""" + # Note : we use ctypes' find_library to find the real name + fullname = find_library(libname) + if not fullname: + raise LibNotFound(libname) + self.shlibs.append(fullname) + + def link(self): + """Do the actual linking.""" + # Prepare two segments. One for .text, the other for .data + .bss + self.text_segment = TextSegment() + # .data will be mapped 0x100000 bytes further + self.data_segment = DataSegment(align=0x100000) + self.output.add_segment(self.text_segment) + self.output.add_segment(self.data_segment) + + # Adjust the ELF header + self.output.header.e_ident.make_default_amd64() + self.output.header.e_phoff = self.output.header.size + self.output.header.e_type = ET_EXEC + # Elf header lies inside .text + self.text_segment.add_content(self.output.header) + + # Create the four Program Headers. They'll be inside .text + # The first Program Header defines .text + ph_text = Elf64_Phdr() + ph_text.p_type = PT_LOAD + ph_text.p_align = 0x100000 + self.output.add_phdr(ph_text) + self.text_segment.add_content(ph_text) + + # Second one defines .data + .bss + ph_data = Elf64_Phdr() + ph_data.p_type = PT_LOAD + ph_data.p_align = 0x100000 + self.output.add_phdr(ph_data) + self.text_segment.add_content(ph_data) + + # Third one is only there to define the DYNAMIC section + ph_dynamic = Elf64_Phdr() + ph_dynamic.p_type = PT_DYNAMIC + self.output.add_phdr(ph_dynamic) + self.text_segment.add_content(ph_dynamic) + + # Fourth one is for interp + ph_interp = Elf64_Phdr() + ph_interp.p_type = PT_INTERP + self.output.add_phdr(ph_interp) + self.text_segment.add_content(ph_interp) + + # We have all the needed program headers, update ELF header + self.output.header.ph_num = len(self.output.phdrs) + + # Create the actual content for the interpreter section + interp = Interpreter() + self.text_segment.add_content(interp) + + # Then the Dynamic section + dynamic = Dynamic() + # for all the requested libs, add a reference in the Dynamic table + for lib in self.shlibs: + dynamic.add_shlib(lib) + # Add an empty symtab, symbol resolution is not done. + dynamic.add_symtab(0) + # And we need a DT_DEBUG + dynamic.add_debug() + + # This belongs to .data + self.data_segment.add_content(dynamic) + # The dynamic table links to a string table for the libs' names. + self.text_segment.add_content(dynamic.strtab) + + # We can now add the interesting sections to the corresponding segments + for i in self.objs: + for sh in i.shdrs: + # Only ALLOC sections are worth it. + # This might require change in the future + if not (sh.sh_flags & SHF_ALLOC): + continue + + if (sh.sh_flags & SHF_EXECINSTR): + self.text_segment.add_content(sh.content) + else: # No exec, it's for .data or .bss + if (sh.sh_type == SHT_NOBITS): + self.data_segment.add_nobits(sh.content) + else: + self.data_segment.add_content(sh.content) + + # Now, everything is at its place. + # Knowing the base address, we can determine where everyone will fall + self.output.layout(base_vaddr=0x400000) + + # Knowing the addresses of all the parts, Program Headers can be filled + # This will put the correct p_offset, p_vaddr, p_filesz and p_memsz + ph_text.update_from_content(self.text_segment) + ph_data.update_from_content(self.data_segment) + ph_interp.update_from_content(interp) + ph_dynamic.update_from_content(dynamic) + + + # Gather the undefined symbols from all input files + undefined_symbols = set() + for i in self.objs: + undefined_symbols.update(i.undefined_symbols) + + # Make a dict with all the symbols declared globally. + # Key is the symbol name, value is the final virtual address + global_symbols = {} + + for i in self.objs: + for s in i.global_symbols: + if s in global_symbols: + raise RedefinedSymbol(s) + # Final address is the section's base address + the symbol's offset + addr = i.global_symbols[s][0].content.virt_addr + addr += i.global_symbols[s][1] + global_symbols[s] = addr + + # Add a few useful symbols + global_symbols["_dt_debug"] = dynamic.dt_debug_address + global_symbols["_DYNAMIC"] = dynamic.virt_addr + + # Find out which symbols aren't really defined anywhere + undefined_symbols.difference_update(global_symbols) + + # For now, it's an error. Later, we could try to find them in the shared + # libraries. + if len(undefined_symbols): + raise UndefinedSymbol(undefined_symbols.pop()) + + + + # We can now do the actual relocation + for i in self.objs: + i.apply_relocation(global_symbols) + + # And update the ELF header with the entry point + if not self.entry_point in global_symbols: + raise UndefinedSymbol(self.entry_point) + self.output.header.e_entry = global_symbols[self.entry_point] + + # DONE ! + + + def toBinArray(self): + return self.output.toBinArray() + + def tofile(self, file_object): + return self.output.toBinArray().tofile(file_object) + diff --git a/bold.py b/bold.py index 74a8ba2..02f6626 100755 --- a/bold.py +++ b/bold.py @@ -5,195 +5,117 @@ # Copyright (C) 2009 Amand 'alrj' Tihon # # This file is part of bold, the Byte Optimized Linker. -# Heavily inspired by elf.h from the GNU C Library. # # You can redistribute this file and/or modify it under the terms of the # GNU Lesser General Public License as published by the Free Software # Foundation, version 2.1. -from elf.BinArray import BinArray -from elf.constants import * -from elf.elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter -import struct, sys - -infiles = [Elf64(n) for n in sys.argv[1:]] -for i in infiles: - i.resolve_names() - i.find_symbols() - -#h = infile.header -#print "Class: %s" % h.e_ident.ei_class -#print "Data: %s" % h.e_ident.ei_data -#print "Version: %s" % h.e_ident.ei_version -#print "OS/ABI: %s" % h.e_ident.ei_osabi -#print "ABI Version: %s" % h.e_ident.ei_abiversion -#print "Type: %s" % h.e_type -#print "Machine: %s" % h.e_machine -#print "Version: %s" % h.e_version -#print "Entry point address: 0x%x" % h.e_entry -#print "Start of program headers: %i (bytes into file)" % h.e_phoff -#print "Start of section headers: %i (bytes into file)" % h.e_shoff -#print "Flags: 0x%x" % h.e_flags -#print "Size of this header: %i (bytes)" % h.e_ehsize -#print "Size of program headers: %i (bytes)" % h.e_phentsize -#print "Number of program headers: %i" % h.e_phnum -#print "Size of section headers: %i (bytes)" % h.e_shentsize -#print "Number of section headers: %i" % h.e_shnum - -#print "Section header string table index: %s" % h.e_shstrndx - -#print - -#print "Section Headers:" -#for sh in infile.shdrs: - #print "[%2i] %-16s %-16s %016x %08x" % (sh.index, sh.name, sh.sh_type, - #sh.sh_addr, sh.sh_offset) - #print " %016x %016x %-5s %4i %4i %4i" % (sh.sh_size, sh.sh_entsize, - #sh.sh_flags, sh.sh_link, sh.sh_info, sh.sh_addralign) -#print - -#for sh in infile.shdrs : - #if sh.sh_type == SHT_STRTAB: - ##print "Section %i is a string table with entries :" % sh.index - ##for i, name in sh.content.iteritems(): - ## print "%4i %s" % (i, name) - #print - #elif sh.sh_type == SHT_SYMTAB: - #print "Section %i is a symbol table with entries :" % sh.index - #print " Num: Value Size Type Bind Vis Ndx Name" - #for i, sym in enumerate(sh.content.symtab): - #print "%6i: %016x %5s %-7s %-6s %-7s %4s %s" % (i, - #sym.st_value, sym.st_size, sym.st_type, sym.st_binding, - #sym.st_visibility, sym.st_shndx, sym.name) - #print - #elif sh.sh_type == SHT_RELA: - #print "Section %s is a RELA that applies to %s:" % (sh.name, sh.target.name) - #print " Offset Info Type Sym. Value Sym. Name + Addend" - #for i in sh.content.relatab: - #print "%012x %012x %-16s %016x %s%s + %x" % (i.r_offset, i.r_info, - #i.r_type, i.symbol.st_value, i.symbol.name, - #sh.owner.shdrs[i.symbol.st_shndx].name, - #i.r_addend) - #print - - - -outfile = Elf64() - -text_segment = TextSegment() -data_segment = DataSegment(align=0x100000) - -outfile.add_segment(text_segment) -outfile.add_segment(data_segment) - - -outfile.header.e_ident.make_default_amd64() -outfile.header.e_phoff = outfile.header.size -outfile.header.e_type = ET_EXEC -text_segment.add_content(outfile.header) - -ph_text = Elf64_Phdr() -ph_text.p_type = PT_LOAD -ph_text.p_align = 0x100000 -outfile.add_phdr(ph_text) -text_segment.add_content(ph_text) - -ph_data = Elf64_Phdr() -ph_data.p_type = PT_LOAD -ph_data.p_align = 0x100000 -outfile.add_phdr(ph_data) -text_segment.add_content(ph_data) - -ph_dynamic = Elf64_Phdr() -ph_dynamic.p_type = PT_DYNAMIC -outfile.add_phdr(ph_dynamic) -text_segment.add_content(ph_dynamic) - -ph_interp = Elf64_Phdr() -ph_interp.p_type = PT_INTERP -outfile.add_phdr(ph_interp) -text_segment.add_content(ph_interp) - -interp = Interpreter() -text_segment.add_content(interp) - -dynamic = Dynamic() -dynamic.add_shlib("libGL.so.1") -dynamic.add_shlib("libSDL-1.2.so.0") -dynamic.add_symtab(0) -dynamic.add_debug() -data_segment.add_content(dynamic) -text_segment.add_content(dynamic.strtab) - - -# Find interresting sections in input file -for i in infiles: - for sh in i.shdrs: - if (sh.sh_flags & SHF_ALLOC): - if (sh.sh_flags & SHF_EXECINSTR): - text_segment.add_content(sh.content) - else: # No exec, it's for .data - if (sh.sh_type == SHT_NOBITS): - data_segment.add_nobits(sh.content) - else: - data_segment.add_content(sh.content) - - -outfile.layout(base_vaddr=0x400000) - - -# Set addresses, sizes, etc. where known -outfile.header.e_phnum = len(outfile.phdrs) -outfile.header.e_phoff = outfile.phdrs[0].file_offset - -ph_text.p_offset = text_segment.file_offset -ph_text.p_vaddr = text_segment.virt_addr -ph_text.p_filesz = text_segment.physical_size -ph_text.p_memsz = text_segment.logical_size - -ph_data.p_offset = data_segment.file_offset -ph_data.p_vaddr = data_segment.virt_addr -ph_data.p_filesz = data_segment.physical_size -ph_data.p_memsz = data_segment.logical_size - -ph_interp.p_offset = interp.file_offset -ph_interp.p_vaddr = interp.virt_addr -ph_interp.p_filesz = interp.physical_size -ph_interp.p_memsz = interp.logical_size - -ph_dynamic.p_offset = dynamic.file_offset -ph_dynamic.p_vaddr = dynamic.virt_addr -ph_dynamic.p_filesz = dynamic.physical_size -ph_dynamic.p_memsz = dynamic.logical_size - -for i in infiles: - outfile.undefined_symbols.extend(i.undefined_symbols) - -dt_dbg = dynamic.dt_debug_address -outfile.global_symbols["_dt_debug"] = dt_dbg -outfile.global_symbols["_DYNAMIC"] = dynamic.virt_addr - -# Take all globally declared symbols, and put them in outfile's dict -for i in infiles: - for s in i.global_symbols: - section_addr = i.global_symbols[s][0].content.virt_addr - addr = section_addr + i.global_symbols[s][1] - if s in outfile.global_symbols: - print "Symbol '%s' defined more than once." - exit(1) - outfile.global_symbols[s] = addr - -for i in infiles: - i.apply_relocation(outfile.global_symbols) - -_start = outfile.global_symbols["_start"] -outfile.header.e_entry = _start - -# outfile.apply_global_relocation() - -f = open("prout", "wb") -outfile.toBinArray().tofile(f) -f.close() - - +#from bold.constants import * +#from bold.elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter + +__author__ = "Amand Tihon " +__version__ = "0.0.1" + + +from Bold.linker import BoldLinker +from Bold.errors import * +from optparse import OptionParser +import os, sys + +class BoldOptionParser(OptionParser): + """Bold option parser.""" + global __version__ + _usage_message = "%prog [options] file..." + _version_message = "%%prog version %s" % __version__ + _description_message = """A limited ELF linker for x86_64. It is +intended to create very small executables with the least possible overhead.""" + + def __init__(self): + OptionParser.__init__(self, usage=self._usage_message, + version=self._version_message, description=self._description_message, + add_help_option=True, prog="bold") + + self.set_defaults(entry="_start", outfile="a.out") + + self.add_option("-e", "--entry", action="store", dest="entry", + metavar="SYMBOL", help="Set the entry point (default: _start)") + self.add_option("-l", "--library", action="append", dest="shlibs", + metavar="LIBNAME", help="Search for library LIBNAME") + self.add_option("-o", "--output", action="store", dest="outfile", + metavar="FILE", help="Set output file name (default: a.out)") + + +def main(): + parser = BoldOptionParser() + options, args = parser.parse_args() + + linker = BoldLinker() + + if options.shlibs: + for shlib in options.shlibs: + try: + linker.add_shlib(shlib) + except LibNotFound, e: + print >>sys.stderr, e + return 1 + + if not args: + print >>sys.stderr, "No input files" + return 1 + + for infile in args: + try: + linker.add_object(infile) + except UnsupportedObject, e: + print >>sys.stderr, e + return 1 + except IOError, e: + print >>sys.stderr, e + return 1 + + linker.entry_point = options.entry + + try: + linker.link() + except UndefinedSymbol, e: + print >>sys.stderr, e + return 1 + except RedefinedSymbol, e: + print >>sys.stderr, e + return 1 + + # Remove the file if it was present + try: + os.unlink(options.outfile) + except os.error, e: + if e.errno == 2: # No such file + pass + + try: + o = open(options.outfile, "wb") + except IOError, e: + print >>sys.stderr, e + return 1 + + linker.tofile(o) + o.close() + + try: + os.chmod(options.outfile, 0755) + except IOError, e: + print >>sys.stderr, e + return 1 + + return 0 + + +if __name__ == "__main__": + try: + rcode = main() + except Exception, e: + raise + print >>sys.stderr, "Unhandled error:", e + rcode = 1 + + exit(rcode) diff --git a/elf/BinArray.py b/elf/BinArray.py deleted file mode 100644 index ea88cad..0000000 --- a/elf/BinArray.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: utf-8 -*- - -# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; - -# Copyright (C) 2009 Amand 'alrj' Tihon -# -# This file is part of bold, the Byte Optimized Linker. -# Heavily inspired by elf.h from the GNU C Library. -# -# You can redistribute this file and/or modify it under the terms of the -# GNU Lesser General Public License as published by the Free Software -# Foundation, version 2.1. - -from array import array -import struct - -class BinArray(array): - """A specialized array that contains bytes""" - def __new__(cls, data=None): - if data: - return array.__new__(BinArray, "B", data) - else: - return array.__new__(BinArray, "B") diff --git a/elf/__init__.py b/elf/__init__.py deleted file mode 100644 index c25fb45..0000000 --- a/elf/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- - -# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; - -# Copyright (C) 2009 Amand 'alrj' Tihon -# -# This file is part of bold, the Byte Optimized Linker. -# Heavily inspired by elf.h from the GNU C Library. -# -# You can redistribute this file and/or modify it under the terms of the -# GNU Lesser General Public License as published by the Free Software -# Foundation, version 2.1. - diff --git a/elf/constants.py b/elf/constants.py deleted file mode 100644 index 72d0100..0000000 --- a/elf/constants.py +++ /dev/null @@ -1,241 +0,0 @@ -# -*- coding: utf-8 -*- - -# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; - -# Copyright (C) 2009 Amand 'alrj' Tihon -# -# This file is part of bold, the Byte Optimized Linker. -# Heavily inspired by elf.h from the GNU C Library. -# -# You can redistribute this file and/or modify it under the terms of the -# GNU Lesser General Public License as published by the Free Software -# Foundation, version 2.1. - -"""This file defines standard ELF constants.""" - -class SymbolicConstant(long): - """Allows you to map a symbolic name with a given integer.""" - _symbolics = {} - _default = None - def __new__(cls, value, symbolic=None): - if symbolic: - cls._symbolics[value] = symbolic - return long.__new__(cls, value) - - def __str__(self): - if long(self) in self._symbolics: - return self._symbolics[long(self)] - elif self._default: - return self._default % long(self) - else: - return str(long(self)) - - -class ElfClass(SymbolicConstant): - _symbolics = {} -ELFCLASSNONE = ElfClass(0, "Invalid ELF class") -ELFCLASS32 = ElfClass(1, "ELF32") -ELFCLASS64 = ElfClass(2, "ELF64") - - -class ElfData(SymbolicConstant): - _symbolics = {} -ELFDATANONE = ElfData(0, "Invalid data encoding") -ELFDATA2LSB = ElfData(1, "Little endian") -ELFDATA2MSB = ElfData(2, "Big endian") - - -class ElfVersion(SymbolicConstant): - _symbolics = {} -EV_NONE = ElfVersion(0, "Invalid ELF version") -EV_CURRENT = ElfVersion(1, "Current version (1)") - - -class ElfOsAbi(SymbolicConstant): - _symbolics = {} -# Fill me -ELFOSABI_NONE = ElfOsAbi(0, "UNIX - System V") -ELFOSABI_SYSV = ElfOsAbi(0, "UNIX - System V") - - -class ElfType(SymbolicConstant): - _symbolics = {} -ET_NONE = ElfType(0, "No file type") -ET_REL = ElfType(1, "Relocatable file") -ET_EXEC = ElfType(2, "Executable file") -ET_DYN = ElfType(3, "Shared object file") -ET_CORE = ElfType(4, "Core file") - - -class ElfMachine(SymbolicConstant): - _symbolics = {} -# Fill me -EM_NONE = ElfMachine(0, "No machine") -EM_386 = ElfMachine(3, "Intel 80386") -EM_X86_64 = ElfMachine(62, "AMD x86-64 architecture") - -class ElfSectionIndex(SymbolicConstant): - _symbolics = {} -SHN_UNDEF = ElfSectionIndex(0, "UND") -SHN_ABS = ElfSectionIndex(0xfff1, "ABS") -SHN_COMMON = ElfSectionIndex(0xfff2, "COM") - -class ElfShType(SymbolicConstant): - _symbolics = {} -SHT_NULL = ElfShType(0, "NULL") -SHT_PROGBITS = ElfShType(1, "PROGBITS") -SHT_SYMTAB = ElfShType(2, "SYMTAB") -SHT_STRTAB = ElfShType(3, "STRTAB") -SHT_RELA = ElfShType(4, "RELA") -SHT_HASH = ElfShType(5, "HASH") -SHT_DYNAMIC = ElfShType(6, "DYNAMIC") -SHT_NOTE = ElfShType(7, "NOTE") -SHT_NOBITS = ElfShType(8, "NOBITS") -SHT_REL = ElfShType(9, "REL") -SHT_SHLIB = ElfShType(10, "SHLIB") -SHT_DYNSYM = ElfShType(11, "DYNSYM") - -SHF_WRITE = 0x1 -SHF_ALLOC = 1 << 1 -SHF_EXECINSTR = 1 << 2 -SHF_MERGE = 1 << 4 -SHF_STRINGS = 1 << 5 -SHF_INFO_LINK = 1 << 6 -SHF_LINK_ORDER = 1 << 7 -SHF_OS_NONCONFORMING = 1 << 8 -SHF_GROUP = 1 << 9 -SHF_TLS = 1 << 10 -SHF_MASKOS = 0x0f00000 -SHF_MASKPROC = 0xf000000 - -STN_UNDEF = 0 - - -class ElfSymbolBinding(SymbolicConstant): - _symbolics = {} -STB_LOCAL = ElfSymbolBinding(0, "LOCAL") -STB_GLOBAL = ElfSymbolBinding(1, "GLOBAL") -STB_WEAK = ElfSymbolBinding(2, "WEAK") - - -class ElfSymbolType(SymbolicConstant): - _symbolics = {} -STT_NOTYPE = ElfSymbolType(0, "NOTYPE") -STT_OBJECT = ElfSymbolType(1, "OBJECT") -STT_FUNC = ElfSymbolType(2, "FUNC") -STT_SECTION = ElfSymbolType(3, "SECTION") -STT_FILE = ElfSymbolType(4, "FILE") -STT_COMMON = ElfSymbolType(5, "COMMON") -STT_TLS = ElfSymbolType(6, "TLS") - - -class ElfSymbolVisibility(SymbolicConstant): - _symbolics = {} -STV_DEFAULT = ElfSymbolVisibility(0, "DEFAULT") -STV_INTERNAL = ElfSymbolVisibility(1, "INTERN") -STV_HIDDEN = ElfSymbolVisibility(2, "HIDDEN") -STV_PROTECTED = ElfSymbolVisibility(3, "PROTECTED") - - -class ElfPhType(SymbolicConstant): - _symbolics = {} -PT_NULL = ElfPhType(0, "NULL") -PT_LOAD = ElfPhType(1, "LOAD") -PT_DYNAMIC = ElfPhType(2, "DYNAMIC") -PT_INTERP = ElfPhType(3, "INTERP") -PT_NOTE = ElfPhType(4, "NOTE") -PT_SHLIB = ElfPhType(5, "SHLIB") -PT_PHDR = ElfPhType(6, "PHDR") -PT_TLS = ElfPhType(7, "TLS") - -PF_X = (1 << 0) -PF_W = (1 << 1) -PF_R = (1 << 2) - -class ElfDynamicType(SymbolicConstant): - _symbolics = {} - _default = "Unknown (0x%x)" -DT_NULL = ElfDynamicType(0, "NULL") -DT_NEEDED = ElfDynamicType(1, "NEEDED") -DT_PLTRELSZ = ElfDynamicType(2, "PLTRELSZ") -DT_PLTGOT = ElfDynamicType(3, "PLTGOT") -DT_HASH = ElfDynamicType(4, "HASH") -DT_STRTAB = ElfDynamicType(5, "STRTAB") -DT_SYMTAB = ElfDynamicType(6, "SYMTAB") -DT_RELA = ElfDynamicType(7, "RELA") -DT_RELASZ = ElfDynamicType(8, "RELASZ") -DT_RELAENT = ElfDynamicType(9, "RELAENT") -DT_STRSZ = ElfDynamicType(10, "STRSZ") -DT_SYMENT = ElfDynamicType(11, "SYMENT") -DT_INIT = ElfDynamicType(12, "INIT") -DT_FINI = ElfDynamicType(13, "FINI") -DT_SONAME = ElfDynamicType(14, "SONAME") -DT_RPATH = ElfDynamicType(15, "RPATH") -DT_SYMBOLIC = ElfDynamicType(16, "SYMBOLIC") -DT_REL = ElfDynamicType(17, "REL") -DT_RELSZ = ElfDynamicType(18, "RELSZ") -DT_RELENT = ElfDynamicType(19, "RELENT") -DT_PLTREL = ElfDynamicType(20, "PLTREL") -DT_DEBUG = ElfDynamicType(21, "DEBUG") -DT_TEXTREL = ElfDynamicType(22, "TEXTREL") -DT_JMPREL = ElfDynamicType(23, "JMPREL") -DT_BIND_NOW = ElfDynamicType(24, "BIND_NOW") -DT_INIT_ARRAY = ElfDynamicType(25, "INIT_ARRAY") -DT_FINI_ARRAY = ElfDynamicType(26, "FINI_ARRAY") -DT_INIT_ARRAYSZ = ElfDynamicType(27, "INIT_ARRAYSZ") -DT_FINI_ARRAYSZ = ElfDynamicType(28, "FINI_ARRAYSZ") -DT_RUNPATH = ElfDynamicType(29, "RUNPATH") -DT_FLAGS = ElfDynamicType(30, "FLAGS") -DT_ENCODING = ElfDynamicType(31, "ENCODING") -DT_PREINIT_ARRAY = ElfDynamicType(32, "PREINIT_ARRAY") -DT_PREINIT_ARRAYSZ = ElfDynamicType(33, "PREINIT_ARRAYSZ") - -# AMD x86-64 relocations -class Amd64Relocation(SymbolicConstant): - _symbolics = {} - -R_X86_64_NONE = Amd64Relocation(0, "NONE") -R_X86_64_64 = Amd64Relocation(1, "64") -R_X86_64_PC32 = Amd64Relocation(2, "PC32") -R_X86_64_GOT32 = Amd64Relocation(3, "GOT32") -R_X86_64_PLT32 = Amd64Relocation(4, "PLT32") -R_X86_64_COPY = Amd64Relocation(5, "COPY") -R_X86_64_GLOB_DAT = Amd64Relocation(6, "GLOB_DAT") -R_X86_64_JUMP_SLOT = Amd64Relocation(7, "JUMP_SLOT") -R_X86_64_RELATIVE = Amd64Relocation(8, "RELATIVE") -R_X86_64_GOTPCREL = Amd64Relocation(9, "GOTPCREL") -R_X86_64_32 = Amd64Relocation(10, "32") -R_X86_64_32S = Amd64Relocation(11, "32S") -R_X86_64_16 = Amd64Relocation(12, "16") -R_X86_64_PC16 = Amd64Relocation(13, "PC16") -R_X86_64_8 = Amd64Relocation(14, "8") -R_X86_64_PC8 = Amd64Relocation(15, "PC8") -R_X86_64_DTPMOD64 = Amd64Relocation(16, "DTPMOD64") -R_X86_64_DTPOFF64 = Amd64Relocation(17, "DTPOFF64") -R_X86_64_TPOFF64 = Amd64Relocation(18, "TPOFF64") -R_X86_64_TLSGD = Amd64Relocation(19, "TLSGD") -R_X86_64_TLSLD = Amd64Relocation(20, "TLSLD") -R_X86_64_DTPOFF32 = Amd64Relocation(21, "DTPOFF32") -R_X86_64_GOTTPOFF = Amd64Relocation(22, "GOTTPOFF") -R_X86_64_TPOFF32 = Amd64Relocation(23, "TPOFF32") - - - - - - - - - - - - - - - - - - - - - diff --git a/elf/elf.py b/elf/elf.py deleted file mode 100644 index d90e81e..0000000 --- a/elf/elf.py +++ /dev/null @@ -1,845 +0,0 @@ -# -*- coding: utf-8 -*- - -# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; - -# Copyright (C) 2009 Amand 'alrj' Tihon -# -# This file is part of bold, the Byte Optimized Linker. -# Heavily inspired by elf.h from the GNU C Library. -# -# You can redistribute this file and/or modify it under the terms of the -# GNU Lesser General Public License as published by the Free Software -# Foundation, version 2.1. - - -from BinArray import BinArray -from constants import * -import struct - -# Helpful decorator -def nested_property(c): - return property(**c()) - -#-------------------------------------------------------------------------- -# Elf -#-------------------------------------------------------------------------- - -class Elf64(object): - """Handles an Elf64 object.""" - interpreter = "/lib64/ld-linux-x86-64.so.2" - - def __init__(self, path=None): - object.__init__(self) - self.header = Elf64_Ehdr() - self.header.owner = self - self.shdrs = [] - self.phdrs = [] - self.shlibs = [] - self.sections = {} - self.segments = [] - self.local_symbols = {} - self.global_symbols = {} - self.undefined_symbols = [] - - if path: - self.fromfile(path) - - def fromfile(self, path): - f = file(path, "rb") - - # Load Elf header - data = BinArray() - data.fromfile(f, Elf64_Ehdr.size) - self.header.fromBinArray(data) - - # Load sections headers - f.seek(self.header.e_shoff) - for i in range(self.header.e_shnum): - data = BinArray() - data.fromfile(f, self.header.e_shentsize) - h = Elf64_Shdr(i, data) - h.owner = self - self.shdrs.append(h) - - # Read sections content - for sh in self.shdrs: - data = BinArray() - if sh.sh_type != SHT_NOBITS: - f.seek(sh.sh_offset) - data.fromfile(f, sh.sh_size) - sh.content = data - - f.close() - - def resolve_names(self): - # The .shstrtab index is in Elf Header. find the sections names - strtab = self.shdrs[self.header.e_shstrndx].content - - for sh in self.shdrs: - sh.name = strtab[int(sh.sh_name)] - self.sections[sh.name] = sh - - # And resolve names in the section itself - sh.resolve_names() - - - def find_symbols(self): - for sh in self.shdrs: - if sh.sh_type == SHT_SYMTAB: - symtab = sh.content.symtab - - for symbol in symtab: - if symbol.st_type == STT_FILE: - continue - if symbol.st_shndx == SHN_ABS: - continue - if symbol.st_shndx == SHN_UNDEF: - if symbol.name: - self.undefined_symbols.append(symbol.name) - continue - - target_section = self.shdrs[symbol.st_shndx] - - symbol_name = symbol.name - value = symbol.st_value - bind = symbol.st_binding - - # We got a name, a target section, and an offset in the section - if symbol.st_binding == STB_LOCAL: - if symbol.st_type == STT_SECTION: - symbol_name = target_section.name - self.local_symbols[symbol_name] = (target_section, value) - else: - self.global_symbols[symbol_name] = (target_section, value) - - def apply_relocation(self, all_global_symbols): - # find relocation tables - relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]] - for sh in relocations: - target = sh.target.content - - for reloc in sh.content.relatab: - - if reloc.symbol.st_shndx == SHN_UNDEF: - # This is an extern symbol, find it in all_global_symbols - sym_address = all_global_symbols[reloc.symbol.name] - print "0x%x" % sym_address - else: - # source == in which section it is defined - source = self.shdrs[reloc.symbol.st_shndx].content - sym_address = source.virt_addr + reloc.symbol.st_value - - target_ba = target.data # The actual BinArray that we'll modify - pc_address = target.virt_addr + reloc.r_offset - - if reloc.r_type == R_X86_64_64: - format = "> 4) & 0x0f) - def fset(self, value): - self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f)) - return locals() - - @nested_property - def st_type(): - def fget(self): - return ElfSymbolType(self.st_info & 0x0f) - def fset(self, value): - self.st_info = ((self.st_info & 0xf0) | (value & 0x0f)) - return locals() - - @nested_property - def st_visibility(): - def fget(self): - return ElfSymbolVisibility(self.st_other & 0x03) - def fset(self, value): - self.st_other = ((self.st_other & 0xfc) | (value & 0x03)) - return locals() - - def fromBinArray(self, rawdata): - t = struct.unpack(self.format, rawdata) - self.st_name = t[0] # index in the strtab pointed by sh_link - self.st_info = t[1] - self.st_other = t[2] - self.st_shndx = ElfSectionIndex(t[3]) - self.st_value = t[4] - self.st_size = t[5] - - -class Elf64_Rel(object): - format = "<2Q" - def __init__(self, rawdata=None): - object.__init__(self) - self.r_addend = 0 # No addend in a Rel. - if rawdata: - self.fromBinArray(rawdata) - - def fromBinArray(sef, rawdata): - t = struct.unpack(self.format, rawdata) - self.r_offset = t[0] - self.r_info = t[1] - - @nested_property - def r_sym(): - def fget(self): - return (self.r_info >> 32) & 0xffffffff - def fset(self, value): - self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff) - return locals() - - @nested_property - def r_type(): - def fget(self): - return Amd64Relocation(self.r_info & 0xffffffff) - def fset(self, value): - self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff) - return locals() - - -class Elf64_Rela(Elf64_Rel): - format = "<2Q q" - def __init__(self, rawdata=None): - Elf64_Rel.__init__(self, rawdata) - - def fromBinArray(self, rawdata): - t = struct.unpack(self.format, rawdata) - self.r_offset = t[0] - self.r_info = t[1] - self.r_addend = t[2] - - -class Elf64_Dyn(object): - format = "<2Q" - size = struct.calcsize(format) - def __init__(self, tag, value): - object.__init__(self) - self.d_tag = tag - self.d_val = value - - @nested_property - def d_ptr(): - def fget(self): - return self.d_val - def fset(self, value): - self.d_val = value - return locals() - - -# Sections types : - -def Section(shdr, data=None): - """A section factory""" - dataclass = { - SHT_NULL: SNull, - SHT_PROGBITS: SProgBits, - SHT_SYMTAB: SSymtab, - SHT_STRTAB: SStrtab, - SHT_RELA: SRela, - SHT_HASH: SHash, - SHT_DYNAMIC: SDynamic, - SHT_NOTE: SNote, - SHT_NOBITS: SNobits, - SHT_REL: SRel, - SHT_SHLIB: SShlib, - SHT_DYNSYM: SDynsym - } - if shdr.sh_type in dataclass: - return dataclass[shdr.sh_type](shdr, data) - else: - return BaseSection(shdr, data) - - -class BaseSection(object): - def __init__(self, shdr, data=None): - object.__init__(self) - self.data = None - self.header = shdr - if data: - self.fromBinArray(data) - - def fromBinArray(self, data): - self.data = data - - def toBinArray(self): - if self.data: - return self.data - else: - return BinArray() - - def resolve_names(self, elf): - """Nothing to resolve.""" - pass - - @nested_property - def size(): - def fget(self): - return len(self.data) - return locals() - physical_size = size - logical_size = size - - def layout(self): - pass - - -class SNull(BaseSection): - def __init__(self, shdr, data=None): - BaseSection.__init__(self, shdr, None) - - -class SProgBits(BaseSection): - def __init__(self, shdr, data=None): - BaseSection.__init__(self, shdr, data) - - -class SSymtab(BaseSection): - entsize = struct.calcsize(Elf64_Sym.format) - def __init__(self, shdr, data=None): - self.symtab = [] - BaseSection.__init__(self, shdr, data) - - def fromBinArray(self, data): - BaseSection.fromBinArray(self, data) - nument = len(data) / self.entsize - for i in range(nument): - start = i * self.entsize - end = i * self.entsize + self.entsize - self.symtab.append(Elf64_Sym(data[start:end])) - - def resolve_names(self, elf): - # For a symtab, the strtab is indicated by sh_link - strtab = elf.shdrs[self.header.sh_link].content - # Resolve for all symbols in the table - for sym in self.symtab: - sym.name = strtab[sym.st_name] - - def __getitem__(self, key): - return self.symtab[key] - - -class SStrtab(BaseSection): - def __init__(self, shdr, data=None): - self.strtab = {} - BaseSection.__init__(self, shdr, data) - - def fromBinArray(self, data): - BaseSection.fromBinArray(self, data) - itab = data.tostring().split('\0') - i = 0 - for sname in itab: - self.strtab[i] = sname - i += len(sname) + 1 - - def __getitem__(self, key): - if key in self.strtab: - return self.strtab[key] - else: - v = self.data[key:].tostring().split('\0')[0] - self.strtab[key] = v - return v - - def iteritems(self): - return self.strtab.iteritems() - - -class SRela(BaseSection): - entsize = struct.calcsize(Elf64_Rela.format) - def __init__(self, shdr, data=None): - self.relatab = [] - BaseSection.__init__(self, shdr, data) - - def fromBinArray(self, data): - BaseSection.fromBinArray(self, data) - nument = len(data) / self.entsize - for i in range(nument): - start = i * self.entsize - end = i * self.entsize + self.entsize - self.relatab.append(Elf64_Rela(data[start:end])) - - def resolve_names(self, elf): - """Badly named, this wil resolve to a symtab entry...""" - # sh_link leads to the symtab - self.symtab = elf.shdrs[self.header.sh_link].content - # sh_info links to the section on which the relocation applies - self.header.target = elf.shdrs[self.header.sh_info] - for r in self.relatab: - r.symbol = self.symtab[r.r_sym] - - - -class SHash(BaseSection): - pass - - -class SDynamic(BaseSection): - pass - - -class SNote(BaseSection): - pass - - -class SNobits(BaseSection): - size = 0 - physical_size = 0 - - @nested_property - def logical_size(): - def fget(self): - return self.header.sh_size - return locals() - - def toBinArray(self): - return BinArray() - -class SRel(BaseSection): - pass - - -class SShlib(BaseSection): - pass - - -class SDynsym(SSymtab): - pass - - -class Elf64_Phdr(object): - format = "<2I 6Q" - size = struct.calcsize(format) - physical_size = size - logical_size = size - - def __init__(self): - object.__init__(self) - self.p_type = PT_NULL - self.p_flags = PF_X + PF_W + PF_R - self.p_offset = 0 - self.p_vaddr = 0 - self.p_paddr = 0 - self.p_filesz = 0 - self.p_memsz = 0 - self.p_align = 1 - #self.content = [] - #self.nobits = [] - - def toBinArray(self): - res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset, - self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align) - return BinArray(res) - - def layout(self): - pass - - #def add_content(self, content): - # self.content.append(content) - - #def add_empty_content(self, content): - # self.nobits.append(content) - - #@nested_property - #def content_size(): - # def fget(self): - # return sum(s.sh_size for s in self.content) - # return locals() - - -class BaseSegment(object): - def __init__(self, align=0): - object.__init__(self) - self.align = align - self.content = [] - - def add_content(self, content): - self.content.append(content) - - def toBinArray(self): - ba = BinArray() - for c in self.content: - ba.extend(c.toBinArray()) - return ba - - @nested_property - def size(): - def fget(self): - return sum(c.size for c in self.content) - return locals() - physical_size = size - logical_size = size - - -class TextSegment(BaseSegment): - def __init__(self, align=0): - BaseSegment.__init__(self, align) - - def layout(self): - virt_addr = self.virt_addr - file_offset = self.file_offset - for i in self.content: - i.virt_addr = virt_addr - i.file_offset = file_offset - i.layout() - virt_addr += i.logical_size - file_offset += i.physical_size - - -class DataSegment(BaseSegment): - def __init__(self, align=0): - BaseSegment.__init__(self, align) - self.nobits = [] - - def add_nobits(self, content): - self.nobits.append(content) - - def layout(self): - virt_addr = self.virt_addr - file_offset = self.file_offset - for i in self.content: - i.virt_addr = virt_addr - i.file_offset = file_offset - i.layout() - virt_addr += i.logical_size - file_offset += i.physical_size - for i in self.nobits: - i.virt_addr = virt_addr - i.file_offset = 0 - i.layout() - virt_addr += i.logical_size - - @nested_property - def logical_size(): - def fget(self): - return self.physical_size + sum(c.logical_size for c in self.nobits) - return locals() - - - -class PStrtab(object): - def __init__(self): - object.__init__(self) - self.table = [] - self.virt_addr = None - - def append(self, string): - if len(self.table): - offset = self.table[-1][0] - offset += len(self.table[-1][1]) - else: - offset = 0 - new_str = string + '\0' - self.table.append((offset, new_str)) - return offset - - @nested_property - def size(): - def fget(self): - return (self.table[-1][0] + len(self.table[-1][1])) - return locals() - physical_size = size - logical_size = size - - def toBinArray(self): - ba = BinArray() - for s in (i[1] for i in self.table): - ba.fromstring(s) - return ba - - def layout(self): - pass - - -class Dynamic(object): - def __init__(self): - object.__init__(self) - self.dyntab = [] - self.strtab = PStrtab() - - @nested_property - def size(): - def fget(self): - # End the table with a DT_NULL without associated value. - return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q")) - return locals() - physical_size = size - logical_size = size - - def add_shlib(self, shlib): - offset = self.strtab.append(shlib) - self.dyntab.append((DT_NEEDED, offset)) - - def add_symtab(self, vaddr): - self.dyntab.append((DT_SYMTAB, vaddr)) - - def add_debug(self): - self.dyntab.append((DT_DEBUG, 0)) - - def layout(self): - # Adjust the address of the strtab, if - if self.strtab.virt_addr is None: - print "Ooops, strtab's address is not known yet. Aborting." - exit(1) - else: - self.dyntab.append((DT_STRTAB, self.strtab.virt_addr)) - - @nested_property - def dt_debug_address(): - def fget(self): - for i, d in enumerate(self.dyntab): - if d[0] == DT_DEBUG: - return self.virt_addr + (i*16 + 8) - return locals() - - - def toBinArray(self): - ba = BinArray() - for i in self.dyntab: - s = struct.pack("<2Q", i[0], i[1]) - ba.fromstring(s) - null = struct.pack("