X-Git-Url: https://git.alrj.org/?p=bold.git;a=blobdiff_plain;f=Bold%2Flinker.py;h=625a28d995de623e54e1c14435b004443e7d59fc;hp=757bb1d12fcecb463f8aa28889e7ee97cbd22622;hb=2f6e3bc47112f6d0fc38f4898752a10d16630ba6;hpb=7eeaa837d3a6f29f9312bf29962214e709663e52 diff --git a/Bold/linker.py b/Bold/linker.py index 757bb1d..625a28d 100644 --- a/Bold/linker.py +++ b/Bold/linker.py @@ -1,4 +1,3 @@ -#! /usr/bin/python # -*- coding: utf-8 -*- # kate: space-indent on; indent-width 2; mixedindent off; indent-mode python; @@ -7,27 +6,37 @@ # This file is part of bold, the Byte Optimized Linker. # # You can redistribute this file and/or modify it under the terms of the -# GNU Lesser General Public License as published by the Free Software -# Foundation, version 2.1. +# GNU General Public License as published by the Free Software Foundation, +# either version 3 of the License or (at your option) any later version. """ Main entry point for the bold linker. """ from constants import * -from elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter +from BinArray import BinArray +from elf import Elf64, Elf64_Phdr, Elf64_Shdr, TextSegment, DataSegment +from elf import SStrtab, SSymtab, SProgBits, SNobits, Dynamic, Interpreter from errors import * +from ctypes import CDLL from ctypes.util import find_library +import struct + + +def hash_name(name): + """Caculate the hash of the function name. + @param name: the string to hash + @return: 32 bits hash value. + """ + h = 0 + for c in name: + h = ((h * 0x21) ^ ord(c)) & 0xffffffff + return h + class BoldLinker(object): """A Linker object takes one or more objects files, optional shared libs, and arranges all this in an executable. - - Important note: the external functions from the libraries are NOT resolved. - This import is left to the user, as it can be done more efficiently by hash. - (http://www.linuxdemos.org/contentarticle/how_to_start_4k_introdev_with_ibh) - For this, a very useful symbol is exported, : _dt_debug, the address of the - DT_DEBUG's d_ptr. """ def __init__(self): @@ -37,14 +46,177 @@ class BoldLinker(object): self.shlibs = [] self.entry_point = "_start" self.output = Elf64() + self.global_symbols = {} + self.undefined_symbols = set() + self.common_symbols = set() + def add_object(self, filename): - """Add a relocatable file as input.""" + """Add a relocatable file as input. + @param filename: path to relocatable object file to add + """ obj = Elf64(filename) obj.resolve_names() obj.find_symbols() self.objs.append(obj) + + def build_symbols_tables(self): + """Find out the globally available symbols, as well as the globally + undefined ones (which should be found in external libraries.""" + + # Gather the "extern" and common symbols from each input files. + for i in self.objs: + self.undefined_symbols.update(i.undefined_symbols) + self.common_symbols.update(i.common_symbols) + + # Make a dict with all the symbols declared globally. + # Key is the symbol name, value will later be set to the final + # virtual address. Currently, we're only interrested in the declaration. + # The virtual addresses are set to None, they'll be resolved later. + for i in self.objs: + for s in i.global_symbols: + if s in self.global_symbols: + raise RedefinedSymbol(s) + self.global_symbols[s] = None + + # Add a few useful symbols. They'll be resolved ater as well. + self.global_symbols["_dt_debug"] = None + self.global_symbols["_DYNAMIC"] = None + + # Find out which symbols aren't really defined anywhere + self.undefined_symbols.difference_update(self.global_symbols) + + # A symbol declared as COMMON in one object may very well have been + # defined in another. In this case, it will be present in the + # global_symbols. + # Take a copy because we can't change the set's size inside the loop + for i in self.common_symbols.copy(): + if i[0] in self.global_symbols: + self.common_symbols.remove(i) + + + def build_external(self, with_jump=False, align_jump=False): + """ + Generate a fake relocatable object, for dynamic linking. + This object is then automatically added in the list of ebjects to link. + TODO: This part is extremely non-portable. + """ + + # Find out all the undefined symbols. They're the one we'll need to resolve + # dynamically. + symbols = sorted(list(self.undefined_symbols)) + + # Those three will soon be known... + symbols.remove('_bold__functions_count') + symbols.remove('_bold__functions_hash') + symbols.remove('_bold__functions_pointers') + + # Create the fake ELF object. + fo = Elf64() # Don't care about most parts of ELF header (?) + fo.filename = "Internal dynamic linker" + + # We need a .data section, a .bss section and a possibly a .text section + data_shdr = Elf64_Shdr() + data_shdr.sh_type = SHT_PROGBITS + data_shdr.sh_flags = (SHF_WRITE | SHF_ALLOC) + data_shdr.sh_size = len(symbols) * 4 + fmt = "<" + "I" * len(symbols) + data_shdr.content = BinArray(struct.pack(fmt, *[hash_name(s) for s in symbols])) + fo.shdrs.append(data_shdr) + fo.sections['.data'] = data_shdr + + # .bss will contain pointers to resolved external functions, as well as + # the COMMON symbols (from C tentative declaration). + bss_size = len(symbols) * 8 + for s_name, s_size, s_alignment in self.common_symbols: + padding = (s_alignment - (bss_size % s_alignment)) + bss_size += padding + s_size + + bss_shdr = Elf64_Shdr() + bss_shdr.sh_type = SHT_NOBITS + bss_shdr.sh_flags = (SHF_WRITE | SHF_ALLOC) + bss_shdr.sh_size = bss_size + bss_shdr.content = BinArray("") + fo.shdrs.append(bss_shdr) + fo.sections['.bss'] = bss_shdr + + if with_jump: + text_shdr = Elf64_Shdr() + text_shdr.sh_type = SHT_PROGBITS + text_shdr.sh_flags = (SHF_ALLOC | SHF_EXECINSTR) + text_shdr.sh_size = len(symbols) * 8 + if align_jump: + fmt = '\xff\x25\x00\x00\x00\x00\x00\x00' # ff 25 = jmp [rel label] + jmp_size = 8 + else: + fmt = '\xff\x25\x00\x00\x00\x00' + jmp_size = 6 + text_shdr.content = BinArray(fmt * len(symbols)) + fo.shdrs.append(text_shdr) + fo.sections['.text'] = text_shdr + + # Cheating here. All symbols declared as global so we don't need to create + # a symtab from scratch. + fo.global_symbols = {} + fo.global_symbols['_bold__functions_count'] = (SHN_ABS, len(symbols)) + fo.global_symbols['_bold__functions_hash'] = (data_shdr, 0) + fo.global_symbols['_bold__functions_pointers'] = (bss_shdr, 0) + + # The COMMON symbols. Assign an offset in .bss, declare as global. + bss_common_offset = len(symbols) * 8 + for s_name, s_size, s_alignment in self.common_symbols: + padding = (s_alignment - (bss_common_offset % s_alignment)) + bss_common_offset += padding + fo.global_symbols[s_name] = (bss_shdr, bss_common_offset) + bss_common_offset += s_size + + + for n, i in enumerate(symbols): + # The hash is always in .data + h = "_bold__hash_%s" % i + fo.global_symbols[h] = (data_shdr, n * 4) # Section, offset + + if with_jump: + # the symbol is in .text, can be called directly + fo.global_symbols[i] = (text_shdr, n * jmp_size) + # another symbol can be used to reference the pointer, just in case. + p = "_bold__%s" % i + fo.global_symbols[p] = (bss_shdr, n * 8) + + else: + # The symbol is in .bss, must be called indirectly + fo.global_symbols[i] = (bss_shdr, n * 8) + + if with_jump: + # Add relocation entries for the jumps + # Relocation will be done for the .text, for every jmp instruction. + class dummy: pass + rela_shdr = Elf64_Shdr() + rela_shdr.sh_type = SHT_RELA + rela_shdr.target = text_shdr + rela_shdr.sh_flags = 0 + rela_shdr._content = dummy() # We only need a container for relatab... + relatab = [] # Prepare a relatab + rela_shdr.content.relatab = relatab + + for n, i in enumerate(symbols): + # Create a relocation entry for each symbol + reloc = dummy() + reloc.r_offset = (n * jmp_size) + 2 # Beginning of the cell to update + reloc.r_addend = -4 + reloc.r_type = R_X86_64_PC32 + reloc.symbol = dummy() + reloc.symbol.st_shndx = SHN_UNDEF + reloc.symbol.name = "_bold__%s" % i + relatab.append(reloc) + fo.shdrs.append(rela_shdr) + fo.sections['.rela.text'] = rela_shdr + + # Ok, let's add this fake object + self.objs.append(fo) + + def add_shlib(self, libname): """Add a shared library to link against.""" # Note : we use ctypes' find_library to find the real name @@ -53,6 +225,27 @@ class BoldLinker(object): raise LibNotFound(libname) self.shlibs.append(fullname) + + def check_external(self): + """Verify that all globally undefined symbols are present in shared + libraries.""" + libs = [] + for libname in self.shlibs: + libs.append(CDLL(libname)) + + for symbol in self.undefined_symbols: + # Hackish ! Eek! + if symbol.startswith('_bold__'): + continue + found = False + for lib in libs: + if hasattr(lib, symbol): + found = True + break + if not found: + raise UndefinedSymbol(symbol) + + def link(self): """Do the actual linking.""" # Prepare two segments. One for .text, the other for .data + .bss @@ -145,47 +338,30 @@ class BoldLinker(object): ph_interp.update_from_content(interp) ph_dynamic.update_from_content(dynamic) - - # Gather the undefined symbols from all input files - undefined_symbols = set() - for i in self.objs: - undefined_symbols.update(i.undefined_symbols) - - # Make a dict with all the symbols declared globally. - # Key is the symbol name, value is the final virtual address - global_symbols = {} - + # All parts are at their final address, find out the symbols' addresses for i in self.objs: for s in i.global_symbols: - if s in global_symbols: - raise RedefinedSymbol(s) # Final address is the section's base address + the symbol's offset - addr = i.global_symbols[s][0].content.virt_addr - addr += i.global_symbols[s][1] - global_symbols[s] = addr - - # Add a few useful symbols - global_symbols["_dt_debug"] = dynamic.dt_debug_address - global_symbols["_DYNAMIC"] = dynamic.virt_addr - - # Find out which symbols aren't really defined anywhere - undefined_symbols.difference_update(global_symbols) - - # For now, it's an error. Later, we could try to find them in the shared - # libraries. - if len(undefined_symbols): - raise UndefinedSymbol(undefined_symbols.pop()) + if i.global_symbols[s][0] == SHN_ABS: + addr = i.global_symbols[s][1] + else: + addr = i.global_symbols[s][0].content.virt_addr + addr += i.global_symbols[s][1] + self.global_symbols[s] = addr + # Resolve the few useful symbols + self.global_symbols["_dt_debug"] = dynamic.dt_debug_address + self.global_symbols["_DYNAMIC"] = dynamic.virt_addr # We can now do the actual relocation for i in self.objs: - i.apply_relocation(global_symbols) + i.apply_relocation(self.global_symbols) # And update the ELF header with the entry point - if not self.entry_point in global_symbols: + if not self.entry_point in self.global_symbols: raise UndefinedSymbol(self.entry_point) - self.output.header.e_entry = global_symbols[self.entry_point] + self.output.header.e_entry = self.global_symbols[self.entry_point] # DONE ! @@ -193,6 +369,7 @@ class BoldLinker(object): def toBinArray(self): return self.output.toBinArray() + def tofile(self, file_object): return self.output.toBinArray().tofile(file_object)