self.undefined_symbols = []
if path:
+ self.filename = path
self.fromfile(path)
# Functions for relocatables files used as input
target = sh.target.content
for reloc in sh.content.relatab:
-
if reloc.symbol.st_shndx == SHN_UNDEF:
# This is an extern symbol, find it in all_global_symbols
sym_address = all_global_symbols[reloc.symbol.name]
If it's not given any argument, it can be used to create a new Strtab."""
def __init__(self, shdr=None, data=None):
self.readonly = (shdr is not None)
- self.strtab = {}
+ self.by_index = {}
+ self.by_name = {}
self.table = []
BaseSection.__init__(self, shdr, data)
self.virt_addr = None
return BaseSection.toBinArray()
ba = BinArray()
- keys = self.strtab.keys()
+ keys = self.by_index.keys()
keys.sort()
for k in keys:
- ba.fromstring(self.strtab[k] + "\0")
+ ba.fromstring(self.by_index[k] + "\0")
return ba
@nested_property
def size():
def fget(self):
- if self.readonly:
- return len(data)
- if len(self.strtab) == 0:
+ if len(self.by_index) == 0:
return 0
- return sum((len(x)+1 for x in self.strtab.values()))
+ return len(self.data)
return locals()
physical_size = size
logical_size = size
def iteritems(self):
- return self.strtab.iteritems()
+ return self.by_index.iteritems()
# Resolution functions
itab = data.tostring().split('\0')
i = 0
for sname in itab:
- self.strtab[i] = sname
+ self.by_index[i] = sname
+ self.by_name[sname] = i
i += len(sname) + 1
def __getitem__(self, key):
- if key in self.strtab:
- return self.strtab[key]
+ if isinstance(key, int):
+ # Find string by index
+ if key in self.by_index:
+ # Already computed, return it
+ return self.by_index[key]
+ else:
+ # It references a substring
+ v = self.data[key:].tostring().split('\0')[0]
+ self.by_index[key] = v
+ self.by_name[v] = key
+ return v
else:
- v = self.data[key:].tostring().split('\0')[0]
- self.strtab[key] = v
- return v
+ # find index by name
+ if key in self.by_name:
+ return self.by_name[key]
+ else:
+ raise KeyError(key)
# Executable creation functions
- def append(self, string):
- if len(self.strtab) == 0:
+ def append(self, identifier):
+ if len(self.by_name) == 0:
offset = 0
else:
- last = max(self.strtab.keys())
- offset = last + len(self.strtab[last]) + 1 # for the \0
- self.strtab[offset] = string
+ last = max(self.by_index.keys())
+ offset = last + len(self.by_index[last]) + 1 # for the \0
+ self.by_index[offset] = identifier
+ self.by_name[identifier] = offset
+ self.data = self.toBinArray()
return offset
def layout(self):
def toBinArray(self):
return BinArray()
+
class SRel(BaseSection):
pass
class Interpreter(object):
+ """
+ Pseudo-section containing the null terminated string referencing the
+ interpreter to use.
+
+ @ivar size: Read-only attribute, size of the null terminated string.
+ @ivar logical_size: alias to size
+ @ivar physical_size: alias to size
+ """
default_interpreter = "/lib64/ld-linux-x86-64.so.2"
def __init__(self, interpreter=None):
+ """
+ @param interpreter: The interpreter ot use. Defaults to
+ "/lib64/ld-linux-x86-64.so.2", as per the specs.
+ @type interpreter: string
+ """
object.__init__(self)
if interpreter:
self.interpreter = interpreter
logical_size = size
def toBinArray(self):
+ """
+ @return: a L{BinArray} with the content of the pseudo-section.
+ """
ba = BinArray(self.interpreter)
ba.append(0)
return ba
def layout(self):
+ """
+ Unused.
+ """
pass
"""
from constants import *
-from elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter
+from BinArray import BinArray
+from elf import Elf64, Elf64_Phdr, Elf64_Shdr, TextSegment, DataSegment
+from elf import SStrtab, SSymtab, SProgBits, SNobits, Dynamic, Interpreter
from errors import *
from ctypes.util import find_library
+import struct
+
+def hash_name(name):
+ """Caculate the hash of the function name."""
+ h = 0
+ for c in name:
+ h = (ord(c) - h + (h << 6) + (h << 16) & 0xffffffff)
+ return h
class BoldLinker(object):
"""A Linker object takes one or more objects files, optional shared libs,
self.shlibs = []
self.entry_point = "_start"
self.output = Elf64()
+ self.global_symbols = {}
+ self.undefined_symbols = set()
def add_object(self, filename):
"""Add a relocatable file as input."""
obj.find_symbols()
self.objs.append(obj)
+ def build_symbols_tables(self):
+ """Find out the globally available symbols, as well as the globally
+ undefined ones (which should be found in external libraries."""
+
+ # Gather the "extern" symbols from each input files.
+ for i in self.objs:
+ self.undefined_symbols.update(i.undefined_symbols)
+
+ # Make a dict with all the symbols declared globally.
+ # Key is the symbol name, value will later be set to the final
+ # virtual address. Currently, we're only interrested in the declaration.
+ # The virtual addresses are set to None, they'll be resolved later.
+ for i in self.objs:
+ for s in i.global_symbols:
+ if s in self.global_symbols:
+ raise RedefinedSymbol(s)
+ self.global_symbols[s] = None
+
+ # Add a few useful symbols. They'll be resolved ater as well.
+ self.global_symbols["_dt_debug"] = None
+ self.global_symbols["_DYNAMIC"] = None
+
+ # Find out which symbols aren't really defined anywhere
+ self.undefined_symbols.difference_update(self.global_symbols)
+
+
+ def build_external(self, with_jump=False, align_jump=True):
+ """
+ Generate a fake relocatable object, for dynamic linking.
+ """
+
+ # Find out all the undefined symbols. They're the one we'll need to resolve
+ # dynamically.
+ symbols = sorted(list(self.undefined_symbols))
+
+ # Those three will soon be known...
+ symbols.remove('_bold__functions_count')
+ symbols.remove('_bold__functions_hash')
+ symbols.remove('_bold__functions_pointers')
+
+ # Create the fake ELF object.
+ fo = Elf64() # Don't care about most parts of ELF header (?)
+ fo.filename = "Internal dynamic linker"
+
+ # We need a .data section, a .bss section and a possibly a .text section
+ data_shdr = Elf64_Shdr()
+ data_shdr.sh_type = SHT_PROGBITS
+ data_shdr.sh_flags = (SHF_WRITE | SHF_ALLOC)
+ data_shdr.sh_size = len(symbols) * 4
+ fmt = "<" + "I" * len(symbols)
+ data_shdr.content = BinArray(struct.pack(fmt, *[hash_name(s) for s in symbols]))
+ fo.shdrs.append(data_shdr)
+ fo.sections['.data'] = data_shdr
+
+ bss_shdr = Elf64_Shdr()
+ bss_shdr.sh_type = SHT_NOBITS
+ bss_shdr.sh_flags = (SHF_WRITE | SHF_ALLOC)
+ bss_shdr.sh_size = len(symbols) * 8
+ bss_shdr.content = BinArray("")
+ fo.shdrs.append(bss_shdr)
+ fo.sections['.bss'] = bss_shdr
+
+ if with_jump:
+ text_shdr = Elf64_Shdr()
+ text_shdr.sh_type = SHT_PROGBITS
+ text_shdr.sh_flags = (SHF_ALLOC | SHF_EXECINSTR)
+ text_shdr.sh_size = len(symbols) * 8
+ if align_jump:
+ fmt = '\xff\x25\x00\x00\x00\x00\x00\x00' # ff 25 = jmp [rel label]
+ jmp_size = 8
+ else:
+ fmt = '\xff\x25\x00\x00\x00\x00'
+ jmp_size = 6
+ text_shdr.content = BinArray(fmt * len(symbols))
+ fo.shdrs.append(text_shdr)
+ fo.sections['.text'] = text_shdr
+
+ # Cheating here. All symbols declared as global so we don't need to create
+ # a symtab from scratch.
+ fo.global_symbols = {}
+ fo.global_symbols['_bold__functions_count'] = (SHN_ABS, len(symbols))
+ fo.global_symbols['_bold__functions_hash'] = (data_shdr, 0)
+ fo.global_symbols['_bold__functions_pointers'] = (bss_shdr, 0)
+
+ for n, i in enumerate(symbols):
+ # The hash is always in .data
+ h = "_bold__hash_%s" % i
+ fo.global_symbols[h] = (data_shdr, n * 4) # Section, offset
+
+ if with_jump:
+ # the symbol is in .text, can be called directly
+ fo.global_symbols[i] = (text_shdr, n * jmp_size)
+ # another symbol can be used to reference the pointer, just in case.
+ p = "_bold__%s" % i
+ fo.global_symbols[p] = (bss_shdr, n * 8)
+
+ else:
+ # The symbol is in .bss, must be called indirectly
+ fo.global_symbols[i] = (bss_shdr, n * 8)
+
+ if with_jump:
+ # Add relocation entries for the jumps
+ # Relocation will be done for the .text, for every jmp instruction.
+ class dummy: pass
+ rela_shdr = Elf64_Shdr()
+ rela_shdr.sh_type = SHT_RELA
+ # rela_shdr.sh_info = fo.shdrs.index(text_shdr)
+ rela_shdr.target = text_shdr
+ rela_shdr.sh_flags = 0
+ rela_shdr._content = dummy() # We only need a container for relatab...
+ relatab = [] # Prepare a relatab
+ rela_shdr.content.relatab = relatab
+
+ for n, i in enumerate(symbols):
+ # Create a relocation entry for each symbol
+ reloc = dummy()
+ reloc.r_offset = (n * jmp_size) + 2 # Beginning of the cell to update
+ reloc.r_addend = -4
+ reloc.r_type = R_X86_64_PC32
+ reloc.symbol = dummy()
+ reloc.symbol.st_shndx = SHN_UNDEF
+ reloc.symbol.name = "_bold__%s" % i
+ # reloc.symbol.st_value = 0
+ relatab.append(reloc)
+ fo.shdrs.append(rela_shdr)
+ fo.sections['.rela.text'] = rela_shdr
+
+ # Ok, let's add this fake object
+ self.objs.append(fo)
+
+
def add_shlib(self, libname):
"""Add a shared library to link against."""
# Note : we use ctypes' find_library to find the real name
ph_interp.update_from_content(interp)
ph_dynamic.update_from_content(dynamic)
-
- # Gather the undefined symbols from all input files
- undefined_symbols = set()
- for i in self.objs:
- undefined_symbols.update(i.undefined_symbols)
-
- # Make a dict with all the symbols declared globally.
- # Key is the symbol name, value is the final virtual address
- global_symbols = {}
-
+ # All parts are at their final address, find out the symbols' addresses
for i in self.objs:
for s in i.global_symbols:
- if s in global_symbols:
- raise RedefinedSymbol(s)
# Final address is the section's base address + the symbol's offset
- addr = i.global_symbols[s][0].content.virt_addr
- addr += i.global_symbols[s][1]
- global_symbols[s] = addr
+ if i.global_symbols[s][0] == SHN_ABS:
+ addr = i.global_symbols[s][1]
+ else:
+ addr = i.global_symbols[s][0].content.virt_addr
+ addr += i.global_symbols[s][1]
- # Add a few useful symbols
- global_symbols["_dt_debug"] = dynamic.dt_debug_address
- global_symbols["_DYNAMIC"] = dynamic.virt_addr
+ self.global_symbols[s] = addr
- # Find out which symbols aren't really defined anywhere
- undefined_symbols.difference_update(global_symbols)
+ # Resolve the few useful symbols
+ self.global_symbols["_dt_debug"] = dynamic.dt_debug_address
+ self.global_symbols["_DYNAMIC"] = dynamic.virt_addr
# For now, it's an error. Later, we could try to find them in the shared
# libraries.
- if len(undefined_symbols):
- raise UndefinedSymbol(undefined_symbols.pop())
+ #if len(self.undefined_symbols):
+ # raise UndefinedSymbol(self.undefined_symbols.pop())
# We can now do the actual relocation
for i in self.objs:
- i.apply_relocation(global_symbols)
+ i.apply_relocation(self.global_symbols)
# And update the ELF header with the entry point
- if not self.entry_point in global_symbols:
+ if not self.entry_point in self.global_symbols:
raise UndefinedSymbol(self.entry_point)
- self.output.header.e_entry = global_symbols[self.entry_point]
+ self.output.header.e_entry = self.global_symbols[self.entry_point]
# DONE !
# GNU Lesser General Public License as published by the Free Software
# Foundation, version 2.1.
-#from bold.constants import *
-#from bold.elf import Elf64, Elf64_Phdr, TextSegment, DataSegment, Dynamic, Interpreter
-
__author__ = "Amand Tihon <amand.tihon@alrj.org>"
__version__ = "0.0.1"
version=self._version_message, description=self._description_message,
add_help_option=True, prog="bold")
- self.set_defaults(entry="_start", outfile="a.out")
+ self.set_defaults(entry="_start", outfile="a.out", raw=False, ccall=False)
self.add_option("-e", "--entry", action="store", dest="entry",
metavar="SYMBOL", help="Set the entry point (default: _start)")
+
self.add_option("-l", "--library", action="append", dest="shlibs",
metavar="LIBNAME", help="Search for library LIBNAME")
+
self.add_option("-o", "--output", action="store", dest="outfile",
metavar="FILE", help="Set output file name (default: a.out)")
+ self.add_option("--raw", action="store_true", dest="raw",
+ help="Don't include the symbol resolution code (default: include it)")
+
+ self.add_option("-c", "--ccall", action="store_true", dest="ccall",
+ help="Make external symbol callable by C (default: no)")
+
def main():
parser = BoldOptionParser()
options, args = parser.parse_args()
- linker = BoldLinker()
-
- if options.shlibs:
- for shlib in options.shlibs:
- try:
- linker.add_shlib(shlib)
- except LibNotFound, e:
- print >>sys.stderr, e
- return 1
-
if not args:
print >>sys.stderr, "No input files"
return 1
+ linker = BoldLinker()
+
for infile in args:
try:
linker.add_object(infile)
print >>sys.stderr, e
return 1
+ if options.ccall and options.raw:
+ # ccall implies that we include the symbol resolution code...
+ print >>sys.stderr, "Including symbol resolution code because of -c."
+ options.raw = False
+
+ if not options.raw:
+ for d in ['data', '/usr/lib/bold/', '/usr/local/lib/bold', '.']:
+ f = os.path.join(d, 'bold_ibh-x86_64.o')
+ try:
+ linker.add_object(f)
+ break
+ except UnsupportedObject, e:
+ # file was found, but is not recognized
+ print >>sys.stderr, e
+ return 1
+ except IOError, e:
+ # not found, try next directory
+ pass
+ else:
+ print >>sys.stderr, "Could not find boldsymres-x86_64.o."
+ return 1
+
+ if options.shlibs:
+ for shlib in options.shlibs:
+ try:
+ linker.add_shlib(shlib)
+ except LibNotFound, e:
+ print >>sys.stderr, e
+ return 1
+
linker.entry_point = options.entry
try:
+ linker.build_symbols_tables()
+ linker.build_external(with_jump=options.ccall)
+
linker.link()
except UndefinedSymbol, e:
print >>sys.stderr, e
--- /dev/null
+; Bold - Import by hash for linux/amd64 (elf64-x86-64)
+; © 2009 Amand "alrj" Tihon
+; kate: syntax Intel x86 (NASM);
+
+; alrj's x86_64 version of the import by hash method by parapete, las, leblane.
+; See the wonderful thread at http://www.pouet.net/topic.php?which=5392 to
+; learn everything about import by hash on Linux.
+
+; Compile with
+; yasm -f elf64 -o bold_ibh-x86_64.o bold_ibh-x86_64.asm
+; (or replace yasm by nasm)
+
+
+BITS 64
+CPU X64
+
+global _bold__ibh
+global exit
+
+extern _dt_debug ; defined by bold linker
+extern _bold__functions_hash ; in .data, generated by bold
+extern _bold__functions_pointers ; in .bss, generated by bold
+extern _bold__functions_count ; immediate 32 bits
+extern main ; must be declared when using this
+
+
+%define SYS_exit 60
+%define DT_HASH 4
+
+segment .text
+
+_bold__ibh:
+; {{{ Do the RTLD
+ mov rbx, [_dt_debug] ; rbx points to r_debug
+ mov rbx, [rbx + 8] ; rbx points to link_map
+ mov rbx, [rbx + 24] ; skip the first two link_map entries
+ mov rbx, [rbx + 24]
+
+ mov esi, [rel _bold__functions_hash] ; Implicitly zero-extended
+ mov edi, [rel _bold__functions_pointers] ; ditto
+ mov ecx, _bold__functions_count
+
+ ; Load all the symbols
+ .symbol_loop:
+ lodsd ; Load symbol hash in eax
+ push rsi
+ push rcx
+
+; {{{ For each hash
+ mov r15d, eax ; Save function hash
+ mov r13, rbx ; copy link_map's pseudo-head
+
+ ; Iterate over libraries found in link_map
+ .libloop:
+ mov rdx, [r13 + 16] ; link_map->l_ld
+
+ ; Find the interesting entries in the DYNAMIC table.
+ .dynamic_loop:
+ xor eax, eax ; enough because hash was 32 bits
+
+ mov al, DT_HASH ; DT_HASH == 4
+ cmp [rdx], rax
+ cmove r9, [rdx+8]
+
+ inc al ; DT_STRTAB == 5
+ cmp [rdx], rax
+ cmove r10, [rdx+8]
+
+ inc al ; DT_SYMTAB == 6
+ cmp [rdx], rax
+ cmove r11, [rdx+8]
+
+ ; Next dynamic entry
+ add rdx, 16
+ xor al, al
+ cmp [rdx], rax
+ jnz .dynamic_loop
+
+ ; All DYNAMIC entries have been read.
+ mov ecx, [r9 + 4] ; nchain, number of exported symbols
+
+ ; Iterate over the symbols in the library (symtab entries).
+ .symbolloop:
+ ; Find the symbol name in strtab
+ mov esi, [r11] ; st_name, offset in strtab
+ add rsi, r10 ; pointer to symbol name
+
+ ; Compute the hash
+ xor edx, edx
+ .hash_loop: ; over each char
+ xor eax, eax
+ lodsb
+ test al, al
+ jz .hash_end
+
+ sub eax, edx
+ shl edx, 6
+ add eax, edx
+ shl edx, 10
+ add edx, eax
+ jmp short .hash_loop
+
+ .hash_end:
+ cmp edx, r15d ; Compare with stored hash
+ je .found
+ add r11, 24 ; Next symtab entry
+ loop .symbolloop
+
+ ; Symbol was not found in this library
+ mov r13, [r13 + 24] ; Next link_map entry
+ jmp short .libloop
+ .found:
+ mov rax, [r11 + 8] ; st_value, offset of the symbol
+ add rax, [r13] ; add link_map->l_addr
+; }}}
+
+ pop rcx
+ pop rsi
+ stosq ; Store function pointer
+ loop .symbol_loop
+; }}}
+
+ ; When all is resolved, call main()
+ call main
+ mov edi, eax
+
+exit:
+ ; Exit cleanly
+ mov eax, SYS_exit
+ syscall