1 # -*- coding: utf-8 -*-
3 # kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
5 # Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
7 # This file is part of bold, the Byte Optimized Linker.
9 # You can redistribute this file and/or modify it under the terms of the
10 # GNU Lesser General Public License as published by the Free Software
11 # Foundation, version 2.1.
14 from BinArray import BinArray
15 from constants import *
20 def nested_property(c):
21 return property(**c())
23 #--------------------------------------------------------------------------
25 #--------------------------------------------------------------------------
28 """Handles an Elf64 object."""
29 interpreter = "/lib64/ld-linux-x86-64.so.2"
31 def __init__(self, path=None):
33 self.header = Elf64_Ehdr()
34 self.header.owner = self
40 self.local_symbols = {}
41 self.global_symbols = {}
42 self.undefined_symbols = []
47 # Functions for relocatables files used as input
49 def fromfile(self, path):
54 data.fromfile(f, Elf64_Ehdr.size)
55 self.header.fromBinArray(data)
57 # This linker only supports relocatable objects
58 if self.header.e_type != ET_REL:
59 raise NotRelocatableObject(path)
61 if self.header.e_ident.ei_class != ELFCLASS64:
62 raise UnsupportedObject(path, "Not %s" % ELFCLASS64)
64 if self.header.e_machine != EM_X86_64:
65 raise UnsupportedObject(path, "Not %s" % EM_X86_64)
67 # Load sections headers
68 f.seek(self.header.e_shoff)
69 for i in range(self.header.e_shnum):
71 data.fromfile(f, self.header.e_shentsize)
72 h = Elf64_Shdr(i, data)
76 # Read sections content
79 if sh.sh_type != SHT_NOBITS:
81 data.fromfile(f, sh.sh_size)
86 def resolve_names(self):
87 # The .shstrtab index is in Elf Header. find the sections names
88 strtab = self.shdrs[self.header.e_shstrndx].content
91 sh.name = strtab[int(sh.sh_name)]
92 self.sections[sh.name] = sh
94 # And resolve names in the section itself
98 def find_symbols(self):
100 if sh.sh_type == SHT_SYMTAB:
101 symtab = sh.content.symtab
103 for symbol in symtab:
104 if symbol.st_type == STT_FILE:
106 if symbol.st_shndx == SHN_ABS:
108 if symbol.st_shndx == SHN_UNDEF:
110 self.undefined_symbols.append(symbol.name)
113 target_section = self.shdrs[symbol.st_shndx]
115 symbol_name = symbol.name
116 value = symbol.st_value
117 bind = symbol.st_binding
119 # We got a name, a target section, and an offset in the section
120 if symbol.st_binding == STB_LOCAL:
121 if symbol.st_type == STT_SECTION:
122 symbol_name = target_section.name
123 self.local_symbols[symbol_name] = (target_section, value)
125 self.global_symbols[symbol_name] = (target_section, value)
127 def apply_relocation(self, all_global_symbols):
128 # find relocation tables
129 relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]]
130 for sh in relocations:
131 target = sh.target.content
133 for reloc in sh.content.relatab:
135 if reloc.symbol.st_shndx == SHN_UNDEF:
136 # This is an extern symbol, find it in all_global_symbols
137 sym_address = all_global_symbols[reloc.symbol.name]
139 # source == in which section it is defined
140 source = self.shdrs[reloc.symbol.st_shndx].content
141 sym_address = source.virt_addr + reloc.symbol.st_value
143 target_ba = target.data # The actual BinArray that we'll modify
144 pc_address = target.virt_addr + reloc.r_offset
146 if reloc.r_type == R_X86_64_64:
147 format = "<Q" # Direct 64 bit address
148 target_value = sym_address + reloc.r_addend
149 elif reloc.r_type == R_X86_64_PC32:
150 format = "<i" # PC relative 32 bit signed
151 target_value = sym_address + reloc.r_addend - pc_address
152 elif reloc.r_type == R_X86_64_32:
153 format = "<I" # Direct 32 bit zero extended
154 target_value = sym_address + reloc.r_addend
155 elif reloc.r_type == R_X86_64_PC16:
156 format = "<h" # 16 bit sign extended pc relative
157 target_value = sym_address + reloc.r_addend - pc_address
158 elif reloc.r_type == R_X86_64_16:
159 format = "<H" # Direct 16 bit zero extended
160 target_value = sym_address + reloc.r_addend
161 elif reloc.r_type == R_X86_64_PC8:
162 format = "b" # 8 bit sign extended pc relative
163 target_value = sym_address + reloc.r_addend - pc_address
164 elif reloc.r_type == R_X86_64_8:
165 format = "b" # Direct 8 bit sign extended
166 target_value = sym_address + reloc.r_addend
168 print "Unsupported relocation type: %s" % reloc.r_type
171 d = BinArray(struct.pack(format, target_value))
172 start = reloc.r_offset
174 target_ba[start:end] = d
177 # Functions for executables files, as output
179 def add_phdr(self, phdr):
180 self.phdrs.append(phdr)
181 self.header.e_phnum = len(self.phdrs)
184 def add_segment(self, segment):
185 self.segments.append(segment)
187 def layout(self, base_vaddr):
188 """Do the actual layout for final executable."""
190 virt_addr = base_vaddr
192 self.virt_addr = base_vaddr
193 self.file_offset = file_offset
194 for s in self.segments:
196 s.virt_addr = virt_addr
197 s.file_offset = file_offset
199 virt_addr += s.logical_size
200 file_offset += s.physical_size
202 def toBinArray(self):
204 for s in self.segments:
205 ba.extend(s.toBinArray())
209 #--------------------------------------------------------------------------
211 #--------------------------------------------------------------------------
213 class Elf64_eident(object):
214 """Detailed representation for the Elf identifier."""
216 size = struct.calcsize(format)
220 def __init__(self, rawdata=None):
221 object.__init__(self)
222 if rawdata is not None:
223 self.fromBinArray(rawdata)
225 def fromBinArray(self, rawdata):
226 t = struct.unpack(self.format, rawdata)
227 self.ei_magic = rawdata[:4]
228 self.ei_class = ElfClass(rawdata[4])
229 self.ei_data = ElfData(rawdata[5])
230 self.ei_version = ElfVersion(rawdata[6])
231 self.ei_osabi = ElfOsAbi(rawdata[7])
232 self.ei_abiversion = 0
233 self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
235 def make_default_amd64(self):
236 self.ei_magic = BinArray([0x7f, 0x45, 0x4c, 0x46])
237 self.ei_class = ELFCLASS64
238 self.ei_data = ELFDATA2LSB
239 self.ei_version = EV_CURRENT
240 self.ei_osabi = ELFOSABI_SYSV
241 self.ei_abiversion = 0
242 self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
244 def toBinArray(self):
245 ba = BinArray(self.ei_magic)
246 ba.append(self.ei_class)
247 ba.append(self.ei_data)
248 ba.append(self.ei_version)
249 ba.append(self.ei_osabi)
250 ba.append(self.ei_abiversion)
251 ba.extend(self.ei_pad)
255 class Elf64_Ehdr(object):
256 """Elf file header"""
257 format = "<16B 2H I 3Q I 6H"
258 size = struct.calcsize(format)
262 def __init__(self, rawdata=None):
263 object.__init__(self)
264 self.e_ident = Elf64_eident()
265 self.e_type = ET_NONE
266 self.e_machine = EM_X86_64
267 self.e_version = EV_CURRENT
272 self.e_ehsize = self.size
273 self.e_phentsize = Elf64_Phdr.size
275 self.e_shentsize = Elf64_Shdr.size
278 if rawdata is not None:
279 self.fromBinArray(rawdata)
281 def fromBinArray(self, rawdata):
282 t = struct.unpack(self.format, rawdata)
283 self.e_ident = Elf64_eident(BinArray(rawdata[:16]))
284 self.e_type = ElfType(t[16])
285 self.e_machine = ElfMachine(t[17])
286 self.e_version = ElfVersion(t[18])
291 self.e_ehsize = t[23]
292 self.e_phentsize = t[24]
294 self.e_shentsize = t[26]
296 self.e_shstrndx = t[28]
298 def toBinArray(self):
299 # Build a list from e_ident and all other fields, to feed struct.pack.
300 values = self.e_ident.toBinArray().tolist()
301 values.extend([self.e_type, self.e_machine, self.e_version, self.e_entry,
302 self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize,
303 self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx])
304 res = struct.pack(self.format, *values)
311 #--------------------------------------------------------------------------
313 #--------------------------------------------------------------------------
315 class Elf64_Shdr(object):
316 """Elf64 section header."""
317 format = "<2I 4Q 2I 2Q"
318 size = struct.calcsize(format)
322 def __init__(self, index=None, rawdata=None):
323 object.__init__(self)
325 if rawdata is not None:
326 self.fromBinArray(rawdata)
328 def fromBinArray(self, rawdata):
329 t = struct.unpack(self.format, rawdata)
331 self.sh_type = ElfShType(t[1])
334 self.sh_offset = t[4]
338 self.sh_addralign = t[8]
339 self.sh_entsize = t[9]
341 def resolve_names(self):
342 self.content.resolve_names(self.owner)
348 def fset(self, data):
349 """Use the Section factory to get the subclass corresponding to the
350 session type specified in this header)."""
351 self._content = Section(self, data)
354 # For sections that contain elements of specific types :
356 class Elf64_Sym(object):
357 """Symbol Table entry"""
358 format = "<I 2B H 2Q "
359 entsize = struct.calcsize(format)
360 def __init__(self, rawdata=None):
361 object.__init__(self)
362 if rawdata is not None:
363 self.fromBinArray(rawdata)
368 return ElfSymbolBinding((self.st_info >> 4) & 0x0f)
369 def fset(self, value):
370 self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f))
376 return ElfSymbolType(self.st_info & 0x0f)
377 def fset(self, value):
378 self.st_info = ((self.st_info & 0xf0) | (value & 0x0f))
384 return ElfSymbolVisibility(self.st_other & 0x03)
385 def fset(self, value):
386 self.st_other = ((self.st_other & 0xfc) | (value & 0x03))
389 def fromBinArray(self, rawdata):
390 t = struct.unpack(self.format, rawdata)
391 self.st_name = t[0] # index in the strtab pointed by sh_link
394 self.st_shndx = ElfSectionIndex(t[3])
399 class Elf64_Rel(object):
401 def __init__(self, rawdata=None):
402 object.__init__(self)
403 self.r_addend = 0 # No addend in a Rel.
404 if rawdata is not None:
405 self.fromBinArray(rawdata)
407 def fromBinArray(sef, rawdata):
408 t = struct.unpack(self.format, rawdata)
415 return (self.r_info >> 32) & 0xffffffff
416 def fset(self, value):
417 self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff)
423 return Amd64Relocation(self.r_info & 0xffffffff)
424 def fset(self, value):
425 self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff)
429 class Elf64_Rela(Elf64_Rel):
431 def __init__(self, rawdata=None):
432 Elf64_Rel.__init__(self, rawdata)
434 def fromBinArray(self, rawdata):
435 t = struct.unpack(self.format, rawdata)
441 class Elf64_Dyn(object):
443 size = struct.calcsize(format)
444 def __init__(self, tag, value):
445 object.__init__(self)
453 def fset(self, value):
457 def toBinArray(self):
459 ba.fromstring(struct.pack(self.format, self.d_tag, self.d_val))
464 def Section(shdr, data=None):
465 """A section factory"""
468 SHT_PROGBITS: SProgBits,
473 SHT_DYNAMIC: SDynamic,
480 if shdr.sh_type in dataclass:
481 return dataclass[shdr.sh_type](shdr, data)
483 return BaseSection(shdr, data)
486 class BaseSection(object):
487 def __init__(self, shdr, rawdata=None):
488 object.__init__(self)
491 if rawdata is not None:
492 self.fromBinArray(rawdata)
494 def fromBinArray(self, rawdata):
497 def toBinArray(self):
503 def resolve_names(self, elf):
504 """Nothing to resolve."""
510 return len(self.data)
519 class SNull(BaseSection):
520 def __init__(self, shdr, data=None):
521 BaseSection.__init__(self, shdr, None)
524 class SProgBits(BaseSection):
525 def __init__(self, shdr, data=None):
526 BaseSection.__init__(self, shdr, data)
529 class SSymtab(BaseSection):
530 entsize = struct.calcsize(Elf64_Sym.format)
531 def __init__(self, shdr, data=None):
533 BaseSection.__init__(self, shdr, data)
535 def fromBinArray(self, data):
536 BaseSection.fromBinArray(self, data)
537 nument = len(data) / self.entsize
538 for i in range(nument):
539 start = i * self.entsize
540 end = i * self.entsize + self.entsize
541 self.symtab.append(Elf64_Sym(data[start:end]))
543 def resolve_names(self, elf):
544 # For a symtab, the strtab is indicated by sh_link
545 strtab = elf.shdrs[self.header.sh_link].content
546 # Resolve for all symbols in the table
547 for sym in self.symtab:
548 sym.name = strtab[sym.st_name]
550 def __getitem__(self, key):
551 return self.symtab[key]
554 class SStrtab(BaseSection):
555 """This one behaves in two completely different ways.
556 If it's given a section header and data, it will act as read-only, only to
557 be used for name resolution.
558 If it's not given any argument, it can be used to create a new Strtab."""
559 def __init__(self, shdr=None, data=None):
560 self.readonly = (shdr is not None)
563 BaseSection.__init__(self, shdr, data)
564 self.virt_addr = None
566 def toBinArray(self):
568 return BaseSection.toBinArray()
571 keys = self.strtab.keys()
574 ba.fromstring(self.strtab[k] + "\0")
582 if len(self.strtab) == 0:
584 return sum((len(x)+1 for x in self.strtab.values()))
590 return self.strtab.iteritems()
592 # Resolution functions
594 def fromBinArray(self, data):
595 BaseSection.fromBinArray(self, data)
596 itab = data.tostring().split('\0')
599 self.strtab[i] = sname
602 def __getitem__(self, key):
603 if key in self.strtab:
604 return self.strtab[key]
606 v = self.data[key:].tostring().split('\0')[0]
610 # Executable creation functions
612 def append(self, string):
613 if len(self.strtab) == 0:
616 last = max(self.strtab.keys())
617 offset = last + len(self.strtab[last]) + 1 # for the \0
618 self.strtab[offset] = string
625 class SRela(BaseSection):
626 entsize = struct.calcsize(Elf64_Rela.format)
627 def __init__(self, shdr, data=None):
629 BaseSection.__init__(self, shdr, data)
631 def fromBinArray(self, data):
632 BaseSection.fromBinArray(self, data)
633 nument = len(data) / self.entsize
634 for i in range(nument):
635 start = i * self.entsize
636 end = i * self.entsize + self.entsize
637 self.relatab.append(Elf64_Rela(data[start:end]))
639 def resolve_names(self, elf):
640 """Badly named, this wil resolve to a symtab entry..."""
641 # sh_link leads to the symtab
642 self.symtab = elf.shdrs[self.header.sh_link].content
643 # sh_info links to the section on which the relocation applies
644 self.header.target = elf.shdrs[self.header.sh_info]
645 for r in self.relatab:
646 r.symbol = self.symtab[r.r_sym]
650 class SHash(BaseSection):
654 class SDynamic(BaseSection):
658 class SNote(BaseSection):
662 class SNobits(BaseSection):
669 return self.header.sh_size
672 def toBinArray(self):
675 class SRel(BaseSection):
679 class SShlib(BaseSection):
683 class SDynsym(SSymtab):
687 class Elf64_Phdr(object):
689 size = struct.calcsize(format)
694 object.__init__(self)
695 self.p_type = PT_NULL
696 self.p_flags = PF_X + PF_W + PF_R
704 def toBinArray(self):
705 res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset,
706 self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align)
712 def update_from_content(self, content):
713 """ Update ofset, address and sizes.
714 After having applied layout(),the content knows all these values."""
715 self.p_offset = content.file_offset
716 self.p_vaddr = content.virt_addr
717 self.p_filesz = content.physical_size
718 self.p_memsz = content.logical_size
721 class BaseSegment(object):
722 def __init__(self, align=0):
723 object.__init__(self)
727 def add_content(self, content):
728 self.content.append(content)
730 def toBinArray(self):
732 for c in self.content:
733 ba.extend(c.toBinArray())
739 return sum(c.size for c in self.content)
745 class TextSegment(BaseSegment):
746 def __init__(self, align=0):
747 BaseSegment.__init__(self, align)
750 virt_addr = self.virt_addr
751 file_offset = self.file_offset
752 for i in self.content:
753 i.virt_addr = virt_addr
754 i.file_offset = file_offset
756 virt_addr += i.logical_size
757 file_offset += i.physical_size
760 class DataSegment(BaseSegment):
761 def __init__(self, align=0):
762 BaseSegment.__init__(self, align)
765 def add_nobits(self, content):
766 self.nobits.append(content)
769 virt_addr = self.virt_addr
770 file_offset = self.file_offset
771 for i in self.content:
772 i.virt_addr = virt_addr
773 i.file_offset = file_offset
775 virt_addr += i.logical_size
776 file_offset += i.physical_size
777 for i in self.nobits:
778 i.virt_addr = virt_addr
781 virt_addr += i.logical_size
786 return self.physical_size + sum(c.logical_size for c in self.nobits)
790 class Dynamic(object):
792 object.__init__(self)
794 self.strtab = SStrtab()
799 # End the table with a DT_NULL without associated value.
800 return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q"))
805 def add_shlib(self, shlib):
806 offset = self.strtab.append(shlib)
807 self.dyntab.append(Elf64_Dyn(DT_NEEDED, offset))
809 def add_symtab(self, vaddr):
810 self.dyntab.append(Elf64_Dyn(DT_SYMTAB, vaddr))
813 self.dyntab.append(Elf64_Dyn(DT_DEBUG, 0))
816 # Adjust the address of the strtab, if
817 if self.strtab.virt_addr is None:
818 print "Ooops, strtab's address is not known yet. Aborting."
821 self.dyntab.append(Elf64_Dyn(DT_STRTAB, self.strtab.virt_addr))
824 def dt_debug_address():
826 for i, d in enumerate(self.dyntab):
827 if d.d_tag == DT_DEBUG:
828 return self.virt_addr + (i*d.size + (d.size/2))
832 def toBinArray(self):
834 for d in self.dyntab:
835 ba.extend(d.toBinArray())
836 null = struct.pack("<Q", DT_NULL)
841 class Interpreter(object):
842 default_interpreter = "/lib64/ld-linux-x86-64.so.2"
844 def __init__(self, interpreter=None):
845 object.__init__(self)
847 self.interpreter = interpreter
849 self.interpreter = self.default_interpreter
855 return len(self.interpreter) + 1
860 def toBinArray(self):
861 ba = BinArray(self.interpreter)