From aa036795cb0ab7f31b9d78cfa562ccb603bc977a Mon Sep 17 00:00:00 2001 From: Amand Tihon Date: Thu, 21 May 2009 15:31:18 +0200 Subject: [PATCH 1/1] First working version. --- bold.py | 199 +++++++++++ elf/BinArray.py | 23 ++ elf/__init__.py | 13 + elf/constants.py | 241 ++++++++++++++ elf/elf.py | 845 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1321 insertions(+) create mode 100755 bold.py create mode 100644 elf/BinArray.py create mode 100644 elf/__init__.py create mode 100644 elf/constants.py create mode 100644 elf/elf.py diff --git a/bold.py b/bold.py new file mode 100755 index 0000000..74a8ba2 --- /dev/null +++ b/bold.py @@ -0,0 +1,199 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# Heavily inspired by elf.h from the GNU C Library. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + +from elf.BinArray import BinArray +from elf.constants import * +from elf.elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter +import struct, sys + +infiles = [Elf64(n) for n in sys.argv[1:]] +for i in infiles: + i.resolve_names() + i.find_symbols() + +#h = infile.header +#print "Class: %s" % h.e_ident.ei_class +#print "Data: %s" % h.e_ident.ei_data +#print "Version: %s" % h.e_ident.ei_version +#print "OS/ABI: %s" % h.e_ident.ei_osabi +#print "ABI Version: %s" % h.e_ident.ei_abiversion +#print "Type: %s" % h.e_type +#print "Machine: %s" % h.e_machine +#print "Version: %s" % h.e_version +#print "Entry point address: 0x%x" % h.e_entry +#print "Start of program headers: %i (bytes into file)" % h.e_phoff +#print "Start of section headers: %i (bytes into file)" % h.e_shoff +#print "Flags: 0x%x" % h.e_flags +#print "Size of this header: %i (bytes)" % h.e_ehsize +#print "Size of program headers: %i (bytes)" % h.e_phentsize +#print "Number of program headers: %i" % h.e_phnum +#print "Size of section headers: %i (bytes)" % h.e_shentsize +#print "Number of section headers: %i" % h.e_shnum + +#print "Section header string table index: %s" % h.e_shstrndx + +#print + +#print "Section Headers:" +#for sh in infile.shdrs: + #print "[%2i] %-16s %-16s %016x %08x" % (sh.index, sh.name, sh.sh_type, + #sh.sh_addr, sh.sh_offset) + #print " %016x %016x %-5s %4i %4i %4i" % (sh.sh_size, sh.sh_entsize, + #sh.sh_flags, sh.sh_link, sh.sh_info, sh.sh_addralign) +#print + +#for sh in infile.shdrs : + #if sh.sh_type == SHT_STRTAB: + ##print "Section %i is a string table with entries :" % sh.index + ##for i, name in sh.content.iteritems(): + ## print "%4i %s" % (i, name) + #print + #elif sh.sh_type == SHT_SYMTAB: + #print "Section %i is a symbol table with entries :" % sh.index + #print " Num: Value Size Type Bind Vis Ndx Name" + #for i, sym in enumerate(sh.content.symtab): + #print "%6i: %016x %5s %-7s %-6s %-7s %4s %s" % (i, + #sym.st_value, sym.st_size, sym.st_type, sym.st_binding, + #sym.st_visibility, sym.st_shndx, sym.name) + #print + #elif sh.sh_type == SHT_RELA: + #print "Section %s is a RELA that applies to %s:" % (sh.name, sh.target.name) + #print " Offset Info Type Sym. Value Sym. Name + Addend" + #for i in sh.content.relatab: + #print "%012x %012x %-16s %016x %s%s + %x" % (i.r_offset, i.r_info, + #i.r_type, i.symbol.st_value, i.symbol.name, + #sh.owner.shdrs[i.symbol.st_shndx].name, + #i.r_addend) + #print + + + +outfile = Elf64() + +text_segment = TextSegment() +data_segment = DataSegment(align=0x100000) + +outfile.add_segment(text_segment) +outfile.add_segment(data_segment) + + +outfile.header.e_ident.make_default_amd64() +outfile.header.e_phoff = outfile.header.size +outfile.header.e_type = ET_EXEC +text_segment.add_content(outfile.header) + +ph_text = Elf64_Phdr() +ph_text.p_type = PT_LOAD +ph_text.p_align = 0x100000 +outfile.add_phdr(ph_text) +text_segment.add_content(ph_text) + +ph_data = Elf64_Phdr() +ph_data.p_type = PT_LOAD +ph_data.p_align = 0x100000 +outfile.add_phdr(ph_data) +text_segment.add_content(ph_data) + +ph_dynamic = Elf64_Phdr() +ph_dynamic.p_type = PT_DYNAMIC +outfile.add_phdr(ph_dynamic) +text_segment.add_content(ph_dynamic) + +ph_interp = Elf64_Phdr() +ph_interp.p_type = PT_INTERP +outfile.add_phdr(ph_interp) +text_segment.add_content(ph_interp) + +interp = Interpreter() +text_segment.add_content(interp) + +dynamic = Dynamic() +dynamic.add_shlib("libGL.so.1") +dynamic.add_shlib("libSDL-1.2.so.0") +dynamic.add_symtab(0) +dynamic.add_debug() +data_segment.add_content(dynamic) +text_segment.add_content(dynamic.strtab) + + +# Find interresting sections in input file +for i in infiles: + for sh in i.shdrs: + if (sh.sh_flags & SHF_ALLOC): + if (sh.sh_flags & SHF_EXECINSTR): + text_segment.add_content(sh.content) + else: # No exec, it's for .data + if (sh.sh_type == SHT_NOBITS): + data_segment.add_nobits(sh.content) + else: + data_segment.add_content(sh.content) + + +outfile.layout(base_vaddr=0x400000) + + +# Set addresses, sizes, etc. where known +outfile.header.e_phnum = len(outfile.phdrs) +outfile.header.e_phoff = outfile.phdrs[0].file_offset + +ph_text.p_offset = text_segment.file_offset +ph_text.p_vaddr = text_segment.virt_addr +ph_text.p_filesz = text_segment.physical_size +ph_text.p_memsz = text_segment.logical_size + +ph_data.p_offset = data_segment.file_offset +ph_data.p_vaddr = data_segment.virt_addr +ph_data.p_filesz = data_segment.physical_size +ph_data.p_memsz = data_segment.logical_size + +ph_interp.p_offset = interp.file_offset +ph_interp.p_vaddr = interp.virt_addr +ph_interp.p_filesz = interp.physical_size +ph_interp.p_memsz = interp.logical_size + +ph_dynamic.p_offset = dynamic.file_offset +ph_dynamic.p_vaddr = dynamic.virt_addr +ph_dynamic.p_filesz = dynamic.physical_size +ph_dynamic.p_memsz = dynamic.logical_size + +for i in infiles: + outfile.undefined_symbols.extend(i.undefined_symbols) + +dt_dbg = dynamic.dt_debug_address +outfile.global_symbols["_dt_debug"] = dt_dbg +outfile.global_symbols["_DYNAMIC"] = dynamic.virt_addr + +# Take all globally declared symbols, and put them in outfile's dict +for i in infiles: + for s in i.global_symbols: + section_addr = i.global_symbols[s][0].content.virt_addr + addr = section_addr + i.global_symbols[s][1] + if s in outfile.global_symbols: + print "Symbol '%s' defined more than once." + exit(1) + outfile.global_symbols[s] = addr + +for i in infiles: + i.apply_relocation(outfile.global_symbols) + +_start = outfile.global_symbols["_start"] +outfile.header.e_entry = _start + +# outfile.apply_global_relocation() + +f = open("prout", "wb") +outfile.toBinArray().tofile(f) +f.close() + + + diff --git a/elf/BinArray.py b/elf/BinArray.py new file mode 100644 index 0000000..ea88cad --- /dev/null +++ b/elf/BinArray.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# Heavily inspired by elf.h from the GNU C Library. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + +from array import array +import struct + +class BinArray(array): + """A specialized array that contains bytes""" + def __new__(cls, data=None): + if data: + return array.__new__(BinArray, "B", data) + else: + return array.__new__(BinArray, "B") diff --git a/elf/__init__.py b/elf/__init__.py new file mode 100644 index 0000000..c25fb45 --- /dev/null +++ b/elf/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- + +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# Heavily inspired by elf.h from the GNU C Library. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + diff --git a/elf/constants.py b/elf/constants.py new file mode 100644 index 0000000..72d0100 --- /dev/null +++ b/elf/constants.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- + +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# Heavily inspired by elf.h from the GNU C Library. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + +"""This file defines standard ELF constants.""" + +class SymbolicConstant(long): + """Allows you to map a symbolic name with a given integer.""" + _symbolics = {} + _default = None + def __new__(cls, value, symbolic=None): + if symbolic: + cls._symbolics[value] = symbolic + return long.__new__(cls, value) + + def __str__(self): + if long(self) in self._symbolics: + return self._symbolics[long(self)] + elif self._default: + return self._default % long(self) + else: + return str(long(self)) + + +class ElfClass(SymbolicConstant): + _symbolics = {} +ELFCLASSNONE = ElfClass(0, "Invalid ELF class") +ELFCLASS32 = ElfClass(1, "ELF32") +ELFCLASS64 = ElfClass(2, "ELF64") + + +class ElfData(SymbolicConstant): + _symbolics = {} +ELFDATANONE = ElfData(0, "Invalid data encoding") +ELFDATA2LSB = ElfData(1, "Little endian") +ELFDATA2MSB = ElfData(2, "Big endian") + + +class ElfVersion(SymbolicConstant): + _symbolics = {} +EV_NONE = ElfVersion(0, "Invalid ELF version") +EV_CURRENT = ElfVersion(1, "Current version (1)") + + +class ElfOsAbi(SymbolicConstant): + _symbolics = {} +# Fill me +ELFOSABI_NONE = ElfOsAbi(0, "UNIX - System V") +ELFOSABI_SYSV = ElfOsAbi(0, "UNIX - System V") + + +class ElfType(SymbolicConstant): + _symbolics = {} +ET_NONE = ElfType(0, "No file type") +ET_REL = ElfType(1, "Relocatable file") +ET_EXEC = ElfType(2, "Executable file") +ET_DYN = ElfType(3, "Shared object file") +ET_CORE = ElfType(4, "Core file") + + +class ElfMachine(SymbolicConstant): + _symbolics = {} +# Fill me +EM_NONE = ElfMachine(0, "No machine") +EM_386 = ElfMachine(3, "Intel 80386") +EM_X86_64 = ElfMachine(62, "AMD x86-64 architecture") + +class ElfSectionIndex(SymbolicConstant): + _symbolics = {} +SHN_UNDEF = ElfSectionIndex(0, "UND") +SHN_ABS = ElfSectionIndex(0xfff1, "ABS") +SHN_COMMON = ElfSectionIndex(0xfff2, "COM") + +class ElfShType(SymbolicConstant): + _symbolics = {} +SHT_NULL = ElfShType(0, "NULL") +SHT_PROGBITS = ElfShType(1, "PROGBITS") +SHT_SYMTAB = ElfShType(2, "SYMTAB") +SHT_STRTAB = ElfShType(3, "STRTAB") +SHT_RELA = ElfShType(4, "RELA") +SHT_HASH = ElfShType(5, "HASH") +SHT_DYNAMIC = ElfShType(6, "DYNAMIC") +SHT_NOTE = ElfShType(7, "NOTE") +SHT_NOBITS = ElfShType(8, "NOBITS") +SHT_REL = ElfShType(9, "REL") +SHT_SHLIB = ElfShType(10, "SHLIB") +SHT_DYNSYM = ElfShType(11, "DYNSYM") + +SHF_WRITE = 0x1 +SHF_ALLOC = 1 << 1 +SHF_EXECINSTR = 1 << 2 +SHF_MERGE = 1 << 4 +SHF_STRINGS = 1 << 5 +SHF_INFO_LINK = 1 << 6 +SHF_LINK_ORDER = 1 << 7 +SHF_OS_NONCONFORMING = 1 << 8 +SHF_GROUP = 1 << 9 +SHF_TLS = 1 << 10 +SHF_MASKOS = 0x0f00000 +SHF_MASKPROC = 0xf000000 + +STN_UNDEF = 0 + + +class ElfSymbolBinding(SymbolicConstant): + _symbolics = {} +STB_LOCAL = ElfSymbolBinding(0, "LOCAL") +STB_GLOBAL = ElfSymbolBinding(1, "GLOBAL") +STB_WEAK = ElfSymbolBinding(2, "WEAK") + + +class ElfSymbolType(SymbolicConstant): + _symbolics = {} +STT_NOTYPE = ElfSymbolType(0, "NOTYPE") +STT_OBJECT = ElfSymbolType(1, "OBJECT") +STT_FUNC = ElfSymbolType(2, "FUNC") +STT_SECTION = ElfSymbolType(3, "SECTION") +STT_FILE = ElfSymbolType(4, "FILE") +STT_COMMON = ElfSymbolType(5, "COMMON") +STT_TLS = ElfSymbolType(6, "TLS") + + +class ElfSymbolVisibility(SymbolicConstant): + _symbolics = {} +STV_DEFAULT = ElfSymbolVisibility(0, "DEFAULT") +STV_INTERNAL = ElfSymbolVisibility(1, "INTERN") +STV_HIDDEN = ElfSymbolVisibility(2, "HIDDEN") +STV_PROTECTED = ElfSymbolVisibility(3, "PROTECTED") + + +class ElfPhType(SymbolicConstant): + _symbolics = {} +PT_NULL = ElfPhType(0, "NULL") +PT_LOAD = ElfPhType(1, "LOAD") +PT_DYNAMIC = ElfPhType(2, "DYNAMIC") +PT_INTERP = ElfPhType(3, "INTERP") +PT_NOTE = ElfPhType(4, "NOTE") +PT_SHLIB = ElfPhType(5, "SHLIB") +PT_PHDR = ElfPhType(6, "PHDR") +PT_TLS = ElfPhType(7, "TLS") + +PF_X = (1 << 0) +PF_W = (1 << 1) +PF_R = (1 << 2) + +class ElfDynamicType(SymbolicConstant): + _symbolics = {} + _default = "Unknown (0x%x)" +DT_NULL = ElfDynamicType(0, "NULL") +DT_NEEDED = ElfDynamicType(1, "NEEDED") +DT_PLTRELSZ = ElfDynamicType(2, "PLTRELSZ") +DT_PLTGOT = ElfDynamicType(3, "PLTGOT") +DT_HASH = ElfDynamicType(4, "HASH") +DT_STRTAB = ElfDynamicType(5, "STRTAB") +DT_SYMTAB = ElfDynamicType(6, "SYMTAB") +DT_RELA = ElfDynamicType(7, "RELA") +DT_RELASZ = ElfDynamicType(8, "RELASZ") +DT_RELAENT = ElfDynamicType(9, "RELAENT") +DT_STRSZ = ElfDynamicType(10, "STRSZ") +DT_SYMENT = ElfDynamicType(11, "SYMENT") +DT_INIT = ElfDynamicType(12, "INIT") +DT_FINI = ElfDynamicType(13, "FINI") +DT_SONAME = ElfDynamicType(14, "SONAME") +DT_RPATH = ElfDynamicType(15, "RPATH") +DT_SYMBOLIC = ElfDynamicType(16, "SYMBOLIC") +DT_REL = ElfDynamicType(17, "REL") +DT_RELSZ = ElfDynamicType(18, "RELSZ") +DT_RELENT = ElfDynamicType(19, "RELENT") +DT_PLTREL = ElfDynamicType(20, "PLTREL") +DT_DEBUG = ElfDynamicType(21, "DEBUG") +DT_TEXTREL = ElfDynamicType(22, "TEXTREL") +DT_JMPREL = ElfDynamicType(23, "JMPREL") +DT_BIND_NOW = ElfDynamicType(24, "BIND_NOW") +DT_INIT_ARRAY = ElfDynamicType(25, "INIT_ARRAY") +DT_FINI_ARRAY = ElfDynamicType(26, "FINI_ARRAY") +DT_INIT_ARRAYSZ = ElfDynamicType(27, "INIT_ARRAYSZ") +DT_FINI_ARRAYSZ = ElfDynamicType(28, "FINI_ARRAYSZ") +DT_RUNPATH = ElfDynamicType(29, "RUNPATH") +DT_FLAGS = ElfDynamicType(30, "FLAGS") +DT_ENCODING = ElfDynamicType(31, "ENCODING") +DT_PREINIT_ARRAY = ElfDynamicType(32, "PREINIT_ARRAY") +DT_PREINIT_ARRAYSZ = ElfDynamicType(33, "PREINIT_ARRAYSZ") + +# AMD x86-64 relocations +class Amd64Relocation(SymbolicConstant): + _symbolics = {} + +R_X86_64_NONE = Amd64Relocation(0, "NONE") +R_X86_64_64 = Amd64Relocation(1, "64") +R_X86_64_PC32 = Amd64Relocation(2, "PC32") +R_X86_64_GOT32 = Amd64Relocation(3, "GOT32") +R_X86_64_PLT32 = Amd64Relocation(4, "PLT32") +R_X86_64_COPY = Amd64Relocation(5, "COPY") +R_X86_64_GLOB_DAT = Amd64Relocation(6, "GLOB_DAT") +R_X86_64_JUMP_SLOT = Amd64Relocation(7, "JUMP_SLOT") +R_X86_64_RELATIVE = Amd64Relocation(8, "RELATIVE") +R_X86_64_GOTPCREL = Amd64Relocation(9, "GOTPCREL") +R_X86_64_32 = Amd64Relocation(10, "32") +R_X86_64_32S = Amd64Relocation(11, "32S") +R_X86_64_16 = Amd64Relocation(12, "16") +R_X86_64_PC16 = Amd64Relocation(13, "PC16") +R_X86_64_8 = Amd64Relocation(14, "8") +R_X86_64_PC8 = Amd64Relocation(15, "PC8") +R_X86_64_DTPMOD64 = Amd64Relocation(16, "DTPMOD64") +R_X86_64_DTPOFF64 = Amd64Relocation(17, "DTPOFF64") +R_X86_64_TPOFF64 = Amd64Relocation(18, "TPOFF64") +R_X86_64_TLSGD = Amd64Relocation(19, "TLSGD") +R_X86_64_TLSLD = Amd64Relocation(20, "TLSLD") +R_X86_64_DTPOFF32 = Amd64Relocation(21, "DTPOFF32") +R_X86_64_GOTTPOFF = Amd64Relocation(22, "GOTTPOFF") +R_X86_64_TPOFF32 = Amd64Relocation(23, "TPOFF32") + + + + + + + + + + + + + + + + + + + + + diff --git a/elf/elf.py b/elf/elf.py new file mode 100644 index 0000000..d90e81e --- /dev/null +++ b/elf/elf.py @@ -0,0 +1,845 @@ +# -*- coding: utf-8 -*- + +# kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; + +# Copyright (C) 2009 Amand 'alrj' Tihon +# +# This file is part of bold, the Byte Optimized Linker. +# Heavily inspired by elf.h from the GNU C Library. +# +# You can redistribute this file and/or modify it under the terms of the +# GNU Lesser General Public License as published by the Free Software +# Foundation, version 2.1. + + +from BinArray import BinArray +from constants import * +import struct + +# Helpful decorator +def nested_property(c): + return property(**c()) + +#-------------------------------------------------------------------------- +# Elf +#-------------------------------------------------------------------------- + +class Elf64(object): + """Handles an Elf64 object.""" + interpreter = "/lib64/ld-linux-x86-64.so.2" + + def __init__(self, path=None): + object.__init__(self) + self.header = Elf64_Ehdr() + self.header.owner = self + self.shdrs = [] + self.phdrs = [] + self.shlibs = [] + self.sections = {} + self.segments = [] + self.local_symbols = {} + self.global_symbols = {} + self.undefined_symbols = [] + + if path: + self.fromfile(path) + + def fromfile(self, path): + f = file(path, "rb") + + # Load Elf header + data = BinArray() + data.fromfile(f, Elf64_Ehdr.size) + self.header.fromBinArray(data) + + # Load sections headers + f.seek(self.header.e_shoff) + for i in range(self.header.e_shnum): + data = BinArray() + data.fromfile(f, self.header.e_shentsize) + h = Elf64_Shdr(i, data) + h.owner = self + self.shdrs.append(h) + + # Read sections content + for sh in self.shdrs: + data = BinArray() + if sh.sh_type != SHT_NOBITS: + f.seek(sh.sh_offset) + data.fromfile(f, sh.sh_size) + sh.content = data + + f.close() + + def resolve_names(self): + # The .shstrtab index is in Elf Header. find the sections names + strtab = self.shdrs[self.header.e_shstrndx].content + + for sh in self.shdrs: + sh.name = strtab[int(sh.sh_name)] + self.sections[sh.name] = sh + + # And resolve names in the section itself + sh.resolve_names() + + + def find_symbols(self): + for sh in self.shdrs: + if sh.sh_type == SHT_SYMTAB: + symtab = sh.content.symtab + + for symbol in symtab: + if symbol.st_type == STT_FILE: + continue + if symbol.st_shndx == SHN_ABS: + continue + if symbol.st_shndx == SHN_UNDEF: + if symbol.name: + self.undefined_symbols.append(symbol.name) + continue + + target_section = self.shdrs[symbol.st_shndx] + + symbol_name = symbol.name + value = symbol.st_value + bind = symbol.st_binding + + # We got a name, a target section, and an offset in the section + if symbol.st_binding == STB_LOCAL: + if symbol.st_type == STT_SECTION: + symbol_name = target_section.name + self.local_symbols[symbol_name] = (target_section, value) + else: + self.global_symbols[symbol_name] = (target_section, value) + + def apply_relocation(self, all_global_symbols): + # find relocation tables + relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]] + for sh in relocations: + target = sh.target.content + + for reloc in sh.content.relatab: + + if reloc.symbol.st_shndx == SHN_UNDEF: + # This is an extern symbol, find it in all_global_symbols + sym_address = all_global_symbols[reloc.symbol.name] + print "0x%x" % sym_address + else: + # source == in which section it is defined + source = self.shdrs[reloc.symbol.st_shndx].content + sym_address = source.virt_addr + reloc.symbol.st_value + + target_ba = target.data # The actual BinArray that we'll modify + pc_address = target.virt_addr + reloc.r_offset + + if reloc.r_type == R_X86_64_64: + format = "> 4) & 0x0f) + def fset(self, value): + self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f)) + return locals() + + @nested_property + def st_type(): + def fget(self): + return ElfSymbolType(self.st_info & 0x0f) + def fset(self, value): + self.st_info = ((self.st_info & 0xf0) | (value & 0x0f)) + return locals() + + @nested_property + def st_visibility(): + def fget(self): + return ElfSymbolVisibility(self.st_other & 0x03) + def fset(self, value): + self.st_other = ((self.st_other & 0xfc) | (value & 0x03)) + return locals() + + def fromBinArray(self, rawdata): + t = struct.unpack(self.format, rawdata) + self.st_name = t[0] # index in the strtab pointed by sh_link + self.st_info = t[1] + self.st_other = t[2] + self.st_shndx = ElfSectionIndex(t[3]) + self.st_value = t[4] + self.st_size = t[5] + + +class Elf64_Rel(object): + format = "<2Q" + def __init__(self, rawdata=None): + object.__init__(self) + self.r_addend = 0 # No addend in a Rel. + if rawdata: + self.fromBinArray(rawdata) + + def fromBinArray(sef, rawdata): + t = struct.unpack(self.format, rawdata) + self.r_offset = t[0] + self.r_info = t[1] + + @nested_property + def r_sym(): + def fget(self): + return (self.r_info >> 32) & 0xffffffff + def fset(self, value): + self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff) + return locals() + + @nested_property + def r_type(): + def fget(self): + return Amd64Relocation(self.r_info & 0xffffffff) + def fset(self, value): + self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff) + return locals() + + +class Elf64_Rela(Elf64_Rel): + format = "<2Q q" + def __init__(self, rawdata=None): + Elf64_Rel.__init__(self, rawdata) + + def fromBinArray(self, rawdata): + t = struct.unpack(self.format, rawdata) + self.r_offset = t[0] + self.r_info = t[1] + self.r_addend = t[2] + + +class Elf64_Dyn(object): + format = "<2Q" + size = struct.calcsize(format) + def __init__(self, tag, value): + object.__init__(self) + self.d_tag = tag + self.d_val = value + + @nested_property + def d_ptr(): + def fget(self): + return self.d_val + def fset(self, value): + self.d_val = value + return locals() + + +# Sections types : + +def Section(shdr, data=None): + """A section factory""" + dataclass = { + SHT_NULL: SNull, + SHT_PROGBITS: SProgBits, + SHT_SYMTAB: SSymtab, + SHT_STRTAB: SStrtab, + SHT_RELA: SRela, + SHT_HASH: SHash, + SHT_DYNAMIC: SDynamic, + SHT_NOTE: SNote, + SHT_NOBITS: SNobits, + SHT_REL: SRel, + SHT_SHLIB: SShlib, + SHT_DYNSYM: SDynsym + } + if shdr.sh_type in dataclass: + return dataclass[shdr.sh_type](shdr, data) + else: + return BaseSection(shdr, data) + + +class BaseSection(object): + def __init__(self, shdr, data=None): + object.__init__(self) + self.data = None + self.header = shdr + if data: + self.fromBinArray(data) + + def fromBinArray(self, data): + self.data = data + + def toBinArray(self): + if self.data: + return self.data + else: + return BinArray() + + def resolve_names(self, elf): + """Nothing to resolve.""" + pass + + @nested_property + def size(): + def fget(self): + return len(self.data) + return locals() + physical_size = size + logical_size = size + + def layout(self): + pass + + +class SNull(BaseSection): + def __init__(self, shdr, data=None): + BaseSection.__init__(self, shdr, None) + + +class SProgBits(BaseSection): + def __init__(self, shdr, data=None): + BaseSection.__init__(self, shdr, data) + + +class SSymtab(BaseSection): + entsize = struct.calcsize(Elf64_Sym.format) + def __init__(self, shdr, data=None): + self.symtab = [] + BaseSection.__init__(self, shdr, data) + + def fromBinArray(self, data): + BaseSection.fromBinArray(self, data) + nument = len(data) / self.entsize + for i in range(nument): + start = i * self.entsize + end = i * self.entsize + self.entsize + self.symtab.append(Elf64_Sym(data[start:end])) + + def resolve_names(self, elf): + # For a symtab, the strtab is indicated by sh_link + strtab = elf.shdrs[self.header.sh_link].content + # Resolve for all symbols in the table + for sym in self.symtab: + sym.name = strtab[sym.st_name] + + def __getitem__(self, key): + return self.symtab[key] + + +class SStrtab(BaseSection): + def __init__(self, shdr, data=None): + self.strtab = {} + BaseSection.__init__(self, shdr, data) + + def fromBinArray(self, data): + BaseSection.fromBinArray(self, data) + itab = data.tostring().split('\0') + i = 0 + for sname in itab: + self.strtab[i] = sname + i += len(sname) + 1 + + def __getitem__(self, key): + if key in self.strtab: + return self.strtab[key] + else: + v = self.data[key:].tostring().split('\0')[0] + self.strtab[key] = v + return v + + def iteritems(self): + return self.strtab.iteritems() + + +class SRela(BaseSection): + entsize = struct.calcsize(Elf64_Rela.format) + def __init__(self, shdr, data=None): + self.relatab = [] + BaseSection.__init__(self, shdr, data) + + def fromBinArray(self, data): + BaseSection.fromBinArray(self, data) + nument = len(data) / self.entsize + for i in range(nument): + start = i * self.entsize + end = i * self.entsize + self.entsize + self.relatab.append(Elf64_Rela(data[start:end])) + + def resolve_names(self, elf): + """Badly named, this wil resolve to a symtab entry...""" + # sh_link leads to the symtab + self.symtab = elf.shdrs[self.header.sh_link].content + # sh_info links to the section on which the relocation applies + self.header.target = elf.shdrs[self.header.sh_info] + for r in self.relatab: + r.symbol = self.symtab[r.r_sym] + + + +class SHash(BaseSection): + pass + + +class SDynamic(BaseSection): + pass + + +class SNote(BaseSection): + pass + + +class SNobits(BaseSection): + size = 0 + physical_size = 0 + + @nested_property + def logical_size(): + def fget(self): + return self.header.sh_size + return locals() + + def toBinArray(self): + return BinArray() + +class SRel(BaseSection): + pass + + +class SShlib(BaseSection): + pass + + +class SDynsym(SSymtab): + pass + + +class Elf64_Phdr(object): + format = "<2I 6Q" + size = struct.calcsize(format) + physical_size = size + logical_size = size + + def __init__(self): + object.__init__(self) + self.p_type = PT_NULL + self.p_flags = PF_X + PF_W + PF_R + self.p_offset = 0 + self.p_vaddr = 0 + self.p_paddr = 0 + self.p_filesz = 0 + self.p_memsz = 0 + self.p_align = 1 + #self.content = [] + #self.nobits = [] + + def toBinArray(self): + res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset, + self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align) + return BinArray(res) + + def layout(self): + pass + + #def add_content(self, content): + # self.content.append(content) + + #def add_empty_content(self, content): + # self.nobits.append(content) + + #@nested_property + #def content_size(): + # def fget(self): + # return sum(s.sh_size for s in self.content) + # return locals() + + +class BaseSegment(object): + def __init__(self, align=0): + object.__init__(self) + self.align = align + self.content = [] + + def add_content(self, content): + self.content.append(content) + + def toBinArray(self): + ba = BinArray() + for c in self.content: + ba.extend(c.toBinArray()) + return ba + + @nested_property + def size(): + def fget(self): + return sum(c.size for c in self.content) + return locals() + physical_size = size + logical_size = size + + +class TextSegment(BaseSegment): + def __init__(self, align=0): + BaseSegment.__init__(self, align) + + def layout(self): + virt_addr = self.virt_addr + file_offset = self.file_offset + for i in self.content: + i.virt_addr = virt_addr + i.file_offset = file_offset + i.layout() + virt_addr += i.logical_size + file_offset += i.physical_size + + +class DataSegment(BaseSegment): + def __init__(self, align=0): + BaseSegment.__init__(self, align) + self.nobits = [] + + def add_nobits(self, content): + self.nobits.append(content) + + def layout(self): + virt_addr = self.virt_addr + file_offset = self.file_offset + for i in self.content: + i.virt_addr = virt_addr + i.file_offset = file_offset + i.layout() + virt_addr += i.logical_size + file_offset += i.physical_size + for i in self.nobits: + i.virt_addr = virt_addr + i.file_offset = 0 + i.layout() + virt_addr += i.logical_size + + @nested_property + def logical_size(): + def fget(self): + return self.physical_size + sum(c.logical_size for c in self.nobits) + return locals() + + + +class PStrtab(object): + def __init__(self): + object.__init__(self) + self.table = [] + self.virt_addr = None + + def append(self, string): + if len(self.table): + offset = self.table[-1][0] + offset += len(self.table[-1][1]) + else: + offset = 0 + new_str = string + '\0' + self.table.append((offset, new_str)) + return offset + + @nested_property + def size(): + def fget(self): + return (self.table[-1][0] + len(self.table[-1][1])) + return locals() + physical_size = size + logical_size = size + + def toBinArray(self): + ba = BinArray() + for s in (i[1] for i in self.table): + ba.fromstring(s) + return ba + + def layout(self): + pass + + +class Dynamic(object): + def __init__(self): + object.__init__(self) + self.dyntab = [] + self.strtab = PStrtab() + + @nested_property + def size(): + def fget(self): + # End the table with a DT_NULL without associated value. + return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q")) + return locals() + physical_size = size + logical_size = size + + def add_shlib(self, shlib): + offset = self.strtab.append(shlib) + self.dyntab.append((DT_NEEDED, offset)) + + def add_symtab(self, vaddr): + self.dyntab.append((DT_SYMTAB, vaddr)) + + def add_debug(self): + self.dyntab.append((DT_DEBUG, 0)) + + def layout(self): + # Adjust the address of the strtab, if + if self.strtab.virt_addr is None: + print "Ooops, strtab's address is not known yet. Aborting." + exit(1) + else: + self.dyntab.append((DT_STRTAB, self.strtab.virt_addr)) + + @nested_property + def dt_debug_address(): + def fget(self): + for i, d in enumerate(self.dyntab): + if d[0] == DT_DEBUG: + return self.virt_addr + (i*16 + 8) + return locals() + + + def toBinArray(self): + ba = BinArray() + for i in self.dyntab: + s = struct.pack("<2Q", i[0], i[1]) + ba.fromstring(s) + null = struct.pack("