1 # -*- coding: utf-8 -*-
2 # kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
4 # Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
6 # This file is part of bold, the Byte Optimized Linker.
8 # You can redistribute this file and/or modify it under the terms of the
9 # GNU General Public License as published by the Free Software Foundation,
10 # either version 3 of the License or (at your option) any later version.
13 from BinArray import BinArray
14 from constants import *
19 def nested_property(c):
20 return property(**c())
23 #--------------------------------------------------------------------------
25 #--------------------------------------------------------------------------
28 """Handles an Elf64 object."""
29 interpreter = "/lib64/ld-linux-x86-64.so.2"
31 def __init__(self, path=None):
33 self.header = Elf64_Ehdr()
34 self.header.owner = self
40 self.local_symbols = {}
41 self.global_symbols = {}
42 self.undefined_symbols = []
48 # Functions for relocatables files used as input
50 def fromfile(self, path):
55 data.fromfile(f, Elf64_Ehdr.size)
56 self.header.fromBinArray(data)
58 # This linker only supports relocatable objects
59 if self.header.e_type != ET_REL:
60 raise NotRelocatableObject(path)
62 if self.header.e_ident.ei_class != ELFCLASS64:
63 raise UnsupportedObject(path, "Not %s" % ELFCLASS64)
65 if self.header.e_machine != EM_X86_64:
66 raise UnsupportedObject(path, "Not %s" % EM_X86_64)
68 # Load sections headers
69 f.seek(self.header.e_shoff)
70 for i in range(self.header.e_shnum):
72 data.fromfile(f, self.header.e_shentsize)
73 h = Elf64_Shdr(i, data)
77 # Read sections content
80 if sh.sh_type != SHT_NOBITS:
82 data.fromfile(f, sh.sh_size)
87 def resolve_names(self):
88 # The .shstrtab index is in Elf Header. find the sections names
89 strtab = self.shdrs[self.header.e_shstrndx].content
92 sh.name = strtab[int(sh.sh_name)]
93 self.sections[sh.name] = sh
95 # And resolve names in the section itself
99 def find_symbols(self):
100 for sh in self.shdrs:
101 if sh.sh_type == SHT_SYMTAB:
102 symtab = sh.content.symtab
104 for symbol in symtab:
105 if symbol.st_type == STT_FILE:
107 if symbol.st_shndx == SHN_ABS:
109 if symbol.st_shndx == SHN_UNDEF:
111 self.undefined_symbols.append(symbol.name)
114 target_section = self.shdrs[symbol.st_shndx]
116 symbol_name = symbol.name
117 value = symbol.st_value
118 bind = symbol.st_binding
120 # We got a name, a target section, and an offset in the section
121 if symbol.st_binding == STB_LOCAL:
122 if symbol.st_type == STT_SECTION:
123 symbol_name = target_section.name
124 self.local_symbols[symbol_name] = (target_section, value)
126 self.global_symbols[symbol_name] = (target_section, value)
128 def apply_relocation(self, all_global_symbols):
129 # find relocation tables
130 relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]]
131 for sh in relocations:
132 target = sh.target.content
134 for reloc in sh.content.relatab:
135 if reloc.symbol.st_shndx == SHN_UNDEF:
136 # This is an extern symbol, find it in all_global_symbols
137 sym_address = all_global_symbols[reloc.symbol.name]
139 # source == in which section it is defined
140 source = self.shdrs[reloc.symbol.st_shndx].content
141 sym_address = source.virt_addr + reloc.symbol.st_value
143 target_ba = target.data # The actual BinArray that we'll modify
144 pc_address = target.virt_addr + reloc.r_offset
146 if reloc.r_type == R_X86_64_64:
147 format = "<Q" # Direct 64 bit address
148 target_value = sym_address + reloc.r_addend
149 elif reloc.r_type == R_X86_64_PC32:
150 format = "<i" # PC relative 32 bit signed
151 target_value = sym_address + reloc.r_addend - pc_address
152 elif reloc.r_type == R_X86_64_32:
153 format = "<I" # Direct 32 bit zero extended
154 target_value = sym_address + reloc.r_addend
155 elif reloc.r_type == R_X86_64_32S:
156 format = "<i" # Direct 32 bit sign extended
157 target_value = sym_address + reloc.r_addend
158 elif reloc.r_type == R_X86_64_PC16:
159 format = "<h" # 16 bit sign extended pc relative
160 target_value = sym_address + reloc.r_addend - pc_address
161 elif reloc.r_type == R_X86_64_16:
162 format = "<H" # Direct 16 bit zero extended
163 target_value = sym_address + reloc.r_addend
164 elif reloc.r_type == R_X86_64_PC8:
165 format = "b" # 8 bit sign extended pc relative
166 target_value = sym_address + reloc.r_addend - pc_address
167 elif reloc.r_type == R_X86_64_8:
168 format = "b" # Direct 8 bit sign extended
169 target_value = sym_address + reloc.r_addend
171 print "Unsupported relocation type: %s" % reloc.r_type
174 d = BinArray(struct.pack(format, target_value))
175 start = reloc.r_offset
177 target_ba[start:end] = d
180 # Functions for executables files, as output
182 def add_phdr(self, phdr):
183 self.phdrs.append(phdr)
184 self.header.e_phnum = len(self.phdrs)
187 def add_segment(self, segment):
188 self.segments.append(segment)
190 def layout(self, base_vaddr):
191 """Do the actual layout for final executable."""
193 virt_addr = base_vaddr
195 self.virt_addr = base_vaddr
196 self.file_offset = file_offset
197 for s in self.segments:
199 s.virt_addr = virt_addr
200 s.file_offset = file_offset
202 virt_addr += s.logical_size
203 file_offset += s.physical_size
205 def toBinArray(self):
207 for s in self.segments:
208 ba.extend(s.toBinArray())
212 #--------------------------------------------------------------------------
214 #--------------------------------------------------------------------------
216 class Elf64_eident(object):
217 """Detailed representation for the Elf identifier."""
219 size = struct.calcsize(format)
223 def __init__(self, rawdata=None):
224 object.__init__(self)
225 if rawdata is not None:
226 self.fromBinArray(rawdata)
228 def fromBinArray(self, rawdata):
229 t = struct.unpack(self.format, rawdata)
230 self.ei_magic = rawdata[:4]
231 self.ei_class = ElfClass(rawdata[4])
232 self.ei_data = ElfData(rawdata[5])
233 self.ei_version = ElfVersion(rawdata[6])
234 self.ei_osabi = ElfOsAbi(rawdata[7])
235 self.ei_abiversion = 0
236 self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
238 def make_default_amd64(self):
239 self.ei_magic = BinArray([0x7f, 0x45, 0x4c, 0x46])
240 self.ei_class = ELFCLASS64
241 self.ei_data = ELFDATA2LSB
242 self.ei_version = EV_CURRENT
243 self.ei_osabi = ELFOSABI_SYSV
244 self.ei_abiversion = 0
245 self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
247 def toBinArray(self):
248 ba = BinArray(self.ei_magic)
249 ba.append(self.ei_class)
250 ba.append(self.ei_data)
251 ba.append(self.ei_version)
252 ba.append(self.ei_osabi)
253 ba.append(self.ei_abiversion)
254 ba.extend(self.ei_pad)
258 class Elf64_Ehdr(object):
259 """Elf file header"""
260 format = "<16B 2H I 3Q I 6H"
261 size = struct.calcsize(format)
265 def __init__(self, rawdata=None):
266 object.__init__(self)
267 self.e_ident = Elf64_eident()
268 self.e_type = ET_NONE
269 self.e_machine = EM_X86_64
270 self.e_version = EV_CURRENT
275 self.e_ehsize = self.size
276 self.e_phentsize = Elf64_Phdr.size
278 self.e_shentsize = Elf64_Shdr.size
281 if rawdata is not None:
282 self.fromBinArray(rawdata)
284 def fromBinArray(self, rawdata):
285 t = struct.unpack(self.format, rawdata)
286 self.e_ident = Elf64_eident(BinArray(rawdata[:16]))
287 self.e_type = ElfType(t[16])
288 self.e_machine = ElfMachine(t[17])
289 self.e_version = ElfVersion(t[18])
294 self.e_ehsize = t[23]
295 self.e_phentsize = t[24]
297 self.e_shentsize = t[26]
299 self.e_shstrndx = t[28]
301 def toBinArray(self):
302 # Build a list from e_ident and all other fields, to feed struct.pack.
303 values = self.e_ident.toBinArray().tolist()
304 values.extend([self.e_type, self.e_machine, self.e_version, self.e_entry,
305 self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize,
306 self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx])
307 res = struct.pack(self.format, *values)
314 #--------------------------------------------------------------------------
316 #--------------------------------------------------------------------------
318 class Elf64_Shdr(object):
319 """Elf64 section header."""
320 format = "<2I 4Q 2I 2Q"
321 size = struct.calcsize(format)
325 def __init__(self, index=None, rawdata=None):
326 object.__init__(self)
328 if rawdata is not None:
329 self.fromBinArray(rawdata)
331 def fromBinArray(self, rawdata):
332 t = struct.unpack(self.format, rawdata)
334 self.sh_type = ElfShType(t[1])
337 self.sh_offset = t[4]
341 self.sh_addralign = t[8]
342 self.sh_entsize = t[9]
344 def resolve_names(self):
345 self.content.resolve_names(self.owner)
351 def fset(self, data):
352 """Use the Section factory to get the subclass corresponding to the
353 session type specified in this header)."""
354 self._content = Section(self, data)
357 # For sections that contain elements of specific types :
359 class Elf64_Sym(object):
360 """Symbol Table entry"""
361 format = "<I 2B H 2Q "
362 entsize = struct.calcsize(format)
363 def __init__(self, rawdata=None):
364 object.__init__(self)
365 if rawdata is not None:
366 self.fromBinArray(rawdata)
371 return ElfSymbolBinding((self.st_info >> 4) & 0x0f)
372 def fset(self, value):
373 self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f))
379 return ElfSymbolType(self.st_info & 0x0f)
380 def fset(self, value):
381 self.st_info = ((self.st_info & 0xf0) | (value & 0x0f))
387 return ElfSymbolVisibility(self.st_other & 0x03)
388 def fset(self, value):
389 self.st_other = ((self.st_other & 0xfc) | (value & 0x03))
392 def fromBinArray(self, rawdata):
393 t = struct.unpack(self.format, rawdata)
394 self.st_name = t[0] # index in the strtab pointed by sh_link
397 self.st_shndx = ElfSectionIndex(t[3])
402 class Elf64_Rel(object):
404 def __init__(self, rawdata=None):
405 object.__init__(self)
406 self.r_addend = 0 # No addend in a Rel.
407 if rawdata is not None:
408 self.fromBinArray(rawdata)
410 def fromBinArray(sef, rawdata):
411 t = struct.unpack(self.format, rawdata)
418 return (self.r_info >> 32) & 0xffffffff
419 def fset(self, value):
420 self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff)
426 return Amd64Relocation(self.r_info & 0xffffffff)
427 def fset(self, value):
428 self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff)
432 class Elf64_Rela(Elf64_Rel):
434 def __init__(self, rawdata=None):
435 Elf64_Rel.__init__(self, rawdata)
437 def fromBinArray(self, rawdata):
438 t = struct.unpack(self.format, rawdata)
444 class Elf64_Dyn(object):
446 size = struct.calcsize(format)
447 def __init__(self, tag, value):
448 object.__init__(self)
456 def fset(self, value):
460 def toBinArray(self):
462 ba.fromstring(struct.pack(self.format, self.d_tag, self.d_val))
467 def Section(shdr, data=None):
468 """A section factory"""
471 SHT_PROGBITS: SProgBits,
476 SHT_DYNAMIC: SDynamic,
483 if shdr.sh_type in dataclass:
484 return dataclass[shdr.sh_type](shdr, data)
486 return BaseSection(shdr, data)
489 class BaseSection(object):
490 def __init__(self, shdr, rawdata=None):
491 object.__init__(self)
494 if rawdata is not None:
495 self.fromBinArray(rawdata)
497 def fromBinArray(self, rawdata):
500 def toBinArray(self):
506 def resolve_names(self, elf):
507 """Nothing to resolve."""
513 return len(self.data)
522 class SNull(BaseSection):
523 def __init__(self, shdr, data=None):
524 BaseSection.__init__(self, shdr, None)
527 class SProgBits(BaseSection):
528 def __init__(self, shdr, data=None):
529 BaseSection.__init__(self, shdr, data)
532 class SSymtab(BaseSection):
533 entsize = struct.calcsize(Elf64_Sym.format)
534 def __init__(self, shdr, data=None):
536 BaseSection.__init__(self, shdr, data)
538 def fromBinArray(self, data):
539 BaseSection.fromBinArray(self, data)
540 nument = len(data) / self.entsize
541 for i in range(nument):
542 start = i * self.entsize
543 end = i * self.entsize + self.entsize
544 self.symtab.append(Elf64_Sym(data[start:end]))
546 def resolve_names(self, elf):
547 # For a symtab, the strtab is indicated by sh_link
548 strtab = elf.shdrs[self.header.sh_link].content
549 # Resolve for all symbols in the table
550 for sym in self.symtab:
551 sym.name = strtab[sym.st_name]
553 def __getitem__(self, key):
554 return self.symtab[key]
557 class SStrtab(BaseSection):
558 """This one behaves in two completely different ways.
559 If it's given a section header and data, it will act as read-only, only to
560 be used for name resolution.
561 If it's not given any argument, it can be used to create a new Strtab."""
562 def __init__(self, shdr=None, data=None):
563 self.readonly = (shdr is not None)
567 BaseSection.__init__(self, shdr, data)
568 self.virt_addr = None
570 def toBinArray(self):
572 return BaseSection.toBinArray()
575 keys = self.by_index.keys()
578 ba.fromstring(self.by_index[k] + "\0")
584 if len(self.by_index) == 0:
586 return len(self.data)
592 return self.by_index.iteritems()
594 # Resolution functions
596 def fromBinArray(self, data):
597 BaseSection.fromBinArray(self, data)
598 itab = data.tostring().split('\0')
601 self.by_index[i] = sname
602 self.by_name[sname] = i
605 def __getitem__(self, key):
606 if isinstance(key, int):
607 # Find string by index
608 if key in self.by_index:
609 # Already computed, return it
610 return self.by_index[key]
612 # It references a substring
613 v = self.data[key:].tostring().split('\0')[0]
614 self.by_index[key] = v
615 self.by_name[v] = key
619 if key in self.by_name:
620 return self.by_name[key]
624 # Executable creation functions
626 def append(self, identifier):
627 if len(self.by_name) == 0:
630 last = max(self.by_index.keys())
631 offset = last + len(self.by_index[last]) + 1 # for the \0
632 self.by_index[offset] = identifier
633 self.by_name[identifier] = offset
634 self.data = self.toBinArray()
641 class SRela(BaseSection):
642 entsize = struct.calcsize(Elf64_Rela.format)
643 def __init__(self, shdr, data=None):
645 BaseSection.__init__(self, shdr, data)
647 def fromBinArray(self, data):
648 BaseSection.fromBinArray(self, data)
649 nument = len(data) / self.entsize
650 for i in range(nument):
651 start = i * self.entsize
652 end = i * self.entsize + self.entsize
653 self.relatab.append(Elf64_Rela(data[start:end]))
655 def resolve_names(self, elf):
656 """Badly named, this wil resolve to a symtab entry..."""
657 # sh_link leads to the symtab
658 self.symtab = elf.shdrs[self.header.sh_link].content
659 # sh_info links to the section on which the relocation applies
660 self.header.target = elf.shdrs[self.header.sh_info]
661 for r in self.relatab:
662 r.symbol = self.symtab[r.r_sym]
666 class SHash(BaseSection):
670 class SDynamic(BaseSection):
674 class SNote(BaseSection):
678 class SNobits(BaseSection):
685 return self.header.sh_size
688 def toBinArray(self):
692 class SRel(BaseSection):
696 class SShlib(BaseSection):
700 class SDynsym(SSymtab):
704 class Elf64_Phdr(object):
706 size = struct.calcsize(format)
711 object.__init__(self)
712 self.p_type = PT_NULL
713 self.p_flags = PF_X + PF_W + PF_R
721 def toBinArray(self):
722 res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset,
723 self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align)
729 def update_from_content(self, content):
730 """ Update ofset, address and sizes.
731 After having applied layout(),the content knows all these values."""
732 self.p_offset = content.file_offset
733 self.p_vaddr = content.virt_addr
734 self.p_filesz = content.physical_size
735 self.p_memsz = content.logical_size
738 class BaseSegment(object):
739 def __init__(self, align=0):
740 object.__init__(self)
744 def add_content(self, content):
745 self.content.append(content)
747 def toBinArray(self):
749 for c in self.content:
750 ba.extend(c.toBinArray())
756 return sum(c.size for c in self.content)
762 class TextSegment(BaseSegment):
763 def __init__(self, align=0):
764 BaseSegment.__init__(self, align)
767 virt_addr = self.virt_addr
768 file_offset = self.file_offset
769 for i in self.content:
770 i.virt_addr = virt_addr
771 i.file_offset = file_offset
773 virt_addr += i.logical_size
774 file_offset += i.physical_size
777 class DataSegment(BaseSegment):
778 def __init__(self, align=0):
779 BaseSegment.__init__(self, align)
782 def add_nobits(self, content):
783 self.nobits.append(content)
786 virt_addr = self.virt_addr
787 file_offset = self.file_offset
788 for i in self.content:
789 i.virt_addr = virt_addr
790 i.file_offset = file_offset
792 virt_addr += i.logical_size
793 file_offset += i.physical_size
794 for i in self.nobits:
795 i.virt_addr = virt_addr
798 virt_addr += i.logical_size
803 return self.physical_size + sum(c.logical_size for c in self.nobits)
807 class Dynamic(object):
809 object.__init__(self)
811 self.strtab = SStrtab()
816 # End the table with a DT_NULL without associated value.
817 return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q"))
822 def add_shlib(self, shlib):
823 offset = self.strtab.append(shlib)
824 self.dyntab.append(Elf64_Dyn(DT_NEEDED, offset))
826 def add_symtab(self, vaddr):
827 self.dyntab.append(Elf64_Dyn(DT_SYMTAB, vaddr))
830 self.dyntab.append(Elf64_Dyn(DT_DEBUG, 0))
833 # Adjust the address of the strtab, if
834 if self.strtab.virt_addr is None:
835 print "Ooops, strtab's address is not known yet. Aborting."
838 self.dyntab.append(Elf64_Dyn(DT_STRTAB, self.strtab.virt_addr))
841 def dt_debug_address():
843 for i, d in enumerate(self.dyntab):
844 if d.d_tag == DT_DEBUG:
845 return self.virt_addr + (i*d.size + (d.size/2))
849 def toBinArray(self):
851 for d in self.dyntab:
852 ba.extend(d.toBinArray())
853 null = struct.pack("<Q", DT_NULL)
858 class Interpreter(object):
860 Pseudo-section containing the null terminated string referencing the
863 @ivar size: Read-only attribute, size of the null terminated string.
864 @ivar logical_size: alias to size
865 @ivar physical_size: alias to size
867 default_interpreter = "/lib64/ld-linux-x86-64.so.2"
869 def __init__(self, interpreter=None):
871 @param interpreter: The interpreter ot use. Defaults to
872 "/lib64/ld-linux-x86-64.so.2", as per the specs.
873 @type interpreter: string
875 object.__init__(self)
877 self.interpreter = interpreter
879 self.interpreter = self.default_interpreter
885 return len(self.interpreter) + 1
890 def toBinArray(self):
892 @return: a L{BinArray} with the content of the pseudo-section.
894 ba = BinArray(self.interpreter)