1 # -*- coding: utf-8 -*-
3 # kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
5 # Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
7 # This file is part of bold, the Byte Optimized Linker.
9 # You can redistribute this file and/or modify it under the terms of the
10 # GNU Lesser General Public License as published by the Free Software
11 # Foundation, version 2.1.
14 from BinArray import BinArray
15 from constants import *
20 def nested_property(c):
21 return property(**c())
23 #--------------------------------------------------------------------------
25 #--------------------------------------------------------------------------
28 """Handles an Elf64 object."""
29 interpreter = "/lib64/ld-linux-x86-64.so.2"
31 def __init__(self, path=None):
33 self.header = Elf64_Ehdr()
34 self.header.owner = self
40 self.local_symbols = {}
41 self.global_symbols = {}
42 self.undefined_symbols = []
48 # Functions for relocatables files used as input
50 def fromfile(self, path):
55 data.fromfile(f, Elf64_Ehdr.size)
56 self.header.fromBinArray(data)
58 # This linker only supports relocatable objects
59 if self.header.e_type != ET_REL:
60 raise NotRelocatableObject(path)
62 if self.header.e_ident.ei_class != ELFCLASS64:
63 raise UnsupportedObject(path, "Not %s" % ELFCLASS64)
65 if self.header.e_machine != EM_X86_64:
66 raise UnsupportedObject(path, "Not %s" % EM_X86_64)
68 # Load sections headers
69 f.seek(self.header.e_shoff)
70 for i in range(self.header.e_shnum):
72 data.fromfile(f, self.header.e_shentsize)
73 h = Elf64_Shdr(i, data)
77 # Read sections content
80 if sh.sh_type != SHT_NOBITS:
82 data.fromfile(f, sh.sh_size)
87 def resolve_names(self):
88 # The .shstrtab index is in Elf Header. find the sections names
89 strtab = self.shdrs[self.header.e_shstrndx].content
92 sh.name = strtab[int(sh.sh_name)]
93 self.sections[sh.name] = sh
95 # And resolve names in the section itself
99 def find_symbols(self):
100 for sh in self.shdrs:
101 if sh.sh_type == SHT_SYMTAB:
102 symtab = sh.content.symtab
104 for symbol in symtab:
105 if symbol.st_type == STT_FILE:
107 if symbol.st_shndx == SHN_ABS:
109 if symbol.st_shndx == SHN_UNDEF:
111 self.undefined_symbols.append(symbol.name)
114 target_section = self.shdrs[symbol.st_shndx]
116 symbol_name = symbol.name
117 value = symbol.st_value
118 bind = symbol.st_binding
120 # We got a name, a target section, and an offset in the section
121 if symbol.st_binding == STB_LOCAL:
122 if symbol.st_type == STT_SECTION:
123 symbol_name = target_section.name
124 self.local_symbols[symbol_name] = (target_section, value)
126 self.global_symbols[symbol_name] = (target_section, value)
128 def apply_relocation(self, all_global_symbols):
129 # find relocation tables
130 relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]]
131 for sh in relocations:
132 target = sh.target.content
134 for reloc in sh.content.relatab:
135 if reloc.symbol.st_shndx == SHN_UNDEF:
136 # This is an extern symbol, find it in all_global_symbols
137 sym_address = all_global_symbols[reloc.symbol.name]
139 # source == in which section it is defined
140 source = self.shdrs[reloc.symbol.st_shndx].content
141 sym_address = source.virt_addr + reloc.symbol.st_value
143 target_ba = target.data # The actual BinArray that we'll modify
144 pc_address = target.virt_addr + reloc.r_offset
146 if reloc.r_type == R_X86_64_64:
147 format = "<Q" # Direct 64 bit address
148 target_value = sym_address + reloc.r_addend
149 elif reloc.r_type == R_X86_64_PC32:
150 format = "<i" # PC relative 32 bit signed
151 target_value = sym_address + reloc.r_addend - pc_address
152 elif reloc.r_type == R_X86_64_32:
153 format = "<I" # Direct 32 bit zero extended
154 target_value = sym_address + reloc.r_addend
155 elif reloc.r_type == R_X86_64_PC16:
156 format = "<h" # 16 bit sign extended pc relative
157 target_value = sym_address + reloc.r_addend - pc_address
158 elif reloc.r_type == R_X86_64_16:
159 format = "<H" # Direct 16 bit zero extended
160 target_value = sym_address + reloc.r_addend
161 elif reloc.r_type == R_X86_64_PC8:
162 format = "b" # 8 bit sign extended pc relative
163 target_value = sym_address + reloc.r_addend - pc_address
164 elif reloc.r_type == R_X86_64_8:
165 format = "b" # Direct 8 bit sign extended
166 target_value = sym_address + reloc.r_addend
168 print "Unsupported relocation type: %s" % reloc.r_type
171 d = BinArray(struct.pack(format, target_value))
172 start = reloc.r_offset
174 target_ba[start:end] = d
177 # Functions for executables files, as output
179 def add_phdr(self, phdr):
180 self.phdrs.append(phdr)
181 self.header.e_phnum = len(self.phdrs)
184 def add_segment(self, segment):
185 self.segments.append(segment)
187 def layout(self, base_vaddr):
188 """Do the actual layout for final executable."""
190 virt_addr = base_vaddr
192 self.virt_addr = base_vaddr
193 self.file_offset = file_offset
194 for s in self.segments:
196 s.virt_addr = virt_addr
197 s.file_offset = file_offset
199 virt_addr += s.logical_size
200 file_offset += s.physical_size
202 def toBinArray(self):
204 for s in self.segments:
205 ba.extend(s.toBinArray())
209 #--------------------------------------------------------------------------
211 #--------------------------------------------------------------------------
213 class Elf64_eident(object):
214 """Detailed representation for the Elf identifier."""
216 size = struct.calcsize(format)
220 def __init__(self, rawdata=None):
221 object.__init__(self)
222 if rawdata is not None:
223 self.fromBinArray(rawdata)
225 def fromBinArray(self, rawdata):
226 t = struct.unpack(self.format, rawdata)
227 self.ei_magic = rawdata[:4]
228 self.ei_class = ElfClass(rawdata[4])
229 self.ei_data = ElfData(rawdata[5])
230 self.ei_version = ElfVersion(rawdata[6])
231 self.ei_osabi = ElfOsAbi(rawdata[7])
232 self.ei_abiversion = 0
233 self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
235 def make_default_amd64(self):
236 self.ei_magic = BinArray([0x7f, 0x45, 0x4c, 0x46])
237 self.ei_class = ELFCLASS64
238 self.ei_data = ELFDATA2LSB
239 self.ei_version = EV_CURRENT
240 self.ei_osabi = ELFOSABI_SYSV
241 self.ei_abiversion = 0
242 self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
244 def toBinArray(self):
245 ba = BinArray(self.ei_magic)
246 ba.append(self.ei_class)
247 ba.append(self.ei_data)
248 ba.append(self.ei_version)
249 ba.append(self.ei_osabi)
250 ba.append(self.ei_abiversion)
251 ba.extend(self.ei_pad)
255 class Elf64_Ehdr(object):
256 """Elf file header"""
257 format = "<16B 2H I 3Q I 6H"
258 size = struct.calcsize(format)
262 def __init__(self, rawdata=None):
263 object.__init__(self)
264 self.e_ident = Elf64_eident()
265 self.e_type = ET_NONE
266 self.e_machine = EM_X86_64
267 self.e_version = EV_CURRENT
272 self.e_ehsize = self.size
273 self.e_phentsize = Elf64_Phdr.size
275 self.e_shentsize = Elf64_Shdr.size
278 if rawdata is not None:
279 self.fromBinArray(rawdata)
281 def fromBinArray(self, rawdata):
282 t = struct.unpack(self.format, rawdata)
283 self.e_ident = Elf64_eident(BinArray(rawdata[:16]))
284 self.e_type = ElfType(t[16])
285 self.e_machine = ElfMachine(t[17])
286 self.e_version = ElfVersion(t[18])
291 self.e_ehsize = t[23]
292 self.e_phentsize = t[24]
294 self.e_shentsize = t[26]
296 self.e_shstrndx = t[28]
298 def toBinArray(self):
299 # Build a list from e_ident and all other fields, to feed struct.pack.
300 values = self.e_ident.toBinArray().tolist()
301 values.extend([self.e_type, self.e_machine, self.e_version, self.e_entry,
302 self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize,
303 self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx])
304 res = struct.pack(self.format, *values)
311 #--------------------------------------------------------------------------
313 #--------------------------------------------------------------------------
315 class Elf64_Shdr(object):
316 """Elf64 section header."""
317 format = "<2I 4Q 2I 2Q"
318 size = struct.calcsize(format)
322 def __init__(self, index=None, rawdata=None):
323 object.__init__(self)
325 if rawdata is not None:
326 self.fromBinArray(rawdata)
328 def fromBinArray(self, rawdata):
329 t = struct.unpack(self.format, rawdata)
331 self.sh_type = ElfShType(t[1])
334 self.sh_offset = t[4]
338 self.sh_addralign = t[8]
339 self.sh_entsize = t[9]
341 def resolve_names(self):
342 self.content.resolve_names(self.owner)
348 def fset(self, data):
349 """Use the Section factory to get the subclass corresponding to the
350 session type specified in this header)."""
351 self._content = Section(self, data)
354 # For sections that contain elements of specific types :
356 class Elf64_Sym(object):
357 """Symbol Table entry"""
358 format = "<I 2B H 2Q "
359 entsize = struct.calcsize(format)
360 def __init__(self, rawdata=None):
361 object.__init__(self)
362 if rawdata is not None:
363 self.fromBinArray(rawdata)
368 return ElfSymbolBinding((self.st_info >> 4) & 0x0f)
369 def fset(self, value):
370 self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f))
376 return ElfSymbolType(self.st_info & 0x0f)
377 def fset(self, value):
378 self.st_info = ((self.st_info & 0xf0) | (value & 0x0f))
384 return ElfSymbolVisibility(self.st_other & 0x03)
385 def fset(self, value):
386 self.st_other = ((self.st_other & 0xfc) | (value & 0x03))
389 def fromBinArray(self, rawdata):
390 t = struct.unpack(self.format, rawdata)
391 self.st_name = t[0] # index in the strtab pointed by sh_link
394 self.st_shndx = ElfSectionIndex(t[3])
399 class Elf64_Rel(object):
401 def __init__(self, rawdata=None):
402 object.__init__(self)
403 self.r_addend = 0 # No addend in a Rel.
404 if rawdata is not None:
405 self.fromBinArray(rawdata)
407 def fromBinArray(sef, rawdata):
408 t = struct.unpack(self.format, rawdata)
415 return (self.r_info >> 32) & 0xffffffff
416 def fset(self, value):
417 self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff)
423 return Amd64Relocation(self.r_info & 0xffffffff)
424 def fset(self, value):
425 self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff)
429 class Elf64_Rela(Elf64_Rel):
431 def __init__(self, rawdata=None):
432 Elf64_Rel.__init__(self, rawdata)
434 def fromBinArray(self, rawdata):
435 t = struct.unpack(self.format, rawdata)
441 class Elf64_Dyn(object):
443 size = struct.calcsize(format)
444 def __init__(self, tag, value):
445 object.__init__(self)
453 def fset(self, value):
457 def toBinArray(self):
459 ba.fromstring(struct.pack(self.format, self.d_tag, self.d_val))
464 def Section(shdr, data=None):
465 """A section factory"""
468 SHT_PROGBITS: SProgBits,
473 SHT_DYNAMIC: SDynamic,
480 if shdr.sh_type in dataclass:
481 return dataclass[shdr.sh_type](shdr, data)
483 return BaseSection(shdr, data)
486 class BaseSection(object):
487 def __init__(self, shdr, rawdata=None):
488 object.__init__(self)
491 if rawdata is not None:
492 self.fromBinArray(rawdata)
494 def fromBinArray(self, rawdata):
497 def toBinArray(self):
503 def resolve_names(self, elf):
504 """Nothing to resolve."""
510 return len(self.data)
519 class SNull(BaseSection):
520 def __init__(self, shdr, data=None):
521 BaseSection.__init__(self, shdr, None)
524 class SProgBits(BaseSection):
525 def __init__(self, shdr, data=None):
526 BaseSection.__init__(self, shdr, data)
529 class SSymtab(BaseSection):
530 entsize = struct.calcsize(Elf64_Sym.format)
531 def __init__(self, shdr, data=None):
533 BaseSection.__init__(self, shdr, data)
535 def fromBinArray(self, data):
536 BaseSection.fromBinArray(self, data)
537 nument = len(data) / self.entsize
538 for i in range(nument):
539 start = i * self.entsize
540 end = i * self.entsize + self.entsize
541 self.symtab.append(Elf64_Sym(data[start:end]))
543 def resolve_names(self, elf):
544 # For a symtab, the strtab is indicated by sh_link
545 strtab = elf.shdrs[self.header.sh_link].content
546 # Resolve for all symbols in the table
547 for sym in self.symtab:
548 sym.name = strtab[sym.st_name]
550 def __getitem__(self, key):
551 return self.symtab[key]
554 class SStrtab(BaseSection):
555 """This one behaves in two completely different ways.
556 If it's given a section header and data, it will act as read-only, only to
557 be used for name resolution.
558 If it's not given any argument, it can be used to create a new Strtab."""
559 def __init__(self, shdr=None, data=None):
560 self.readonly = (shdr is not None)
564 BaseSection.__init__(self, shdr, data)
565 self.virt_addr = None
567 def toBinArray(self):
569 return BaseSection.toBinArray()
572 keys = self.by_index.keys()
575 ba.fromstring(self.by_index[k] + "\0")
581 if len(self.by_index) == 0:
583 return len(self.data)
589 return self.by_index.iteritems()
591 # Resolution functions
593 def fromBinArray(self, data):
594 BaseSection.fromBinArray(self, data)
595 itab = data.tostring().split('\0')
598 self.by_index[i] = sname
599 self.by_name[sname] = i
602 def __getitem__(self, key):
603 if isinstance(key, int):
604 # Find string by index
605 if key in self.by_index:
606 # Already computed, return it
607 return self.by_index[key]
609 # It references a substring
610 v = self.data[key:].tostring().split('\0')[0]
611 self.by_index[key] = v
612 self.by_name[v] = key
616 if key in self.by_name:
617 return self.by_name[key]
621 # Executable creation functions
623 def append(self, identifier):
624 if len(self.by_name) == 0:
627 last = max(self.by_index.keys())
628 offset = last + len(self.by_index[last]) + 1 # for the \0
629 self.by_index[offset] = identifier
630 self.by_name[identifier] = offset
631 self.data = self.toBinArray()
638 class SRela(BaseSection):
639 entsize = struct.calcsize(Elf64_Rela.format)
640 def __init__(self, shdr, data=None):
642 BaseSection.__init__(self, shdr, data)
644 def fromBinArray(self, data):
645 BaseSection.fromBinArray(self, data)
646 nument = len(data) / self.entsize
647 for i in range(nument):
648 start = i * self.entsize
649 end = i * self.entsize + self.entsize
650 self.relatab.append(Elf64_Rela(data[start:end]))
652 def resolve_names(self, elf):
653 """Badly named, this wil resolve to a symtab entry..."""
654 # sh_link leads to the symtab
655 self.symtab = elf.shdrs[self.header.sh_link].content
656 # sh_info links to the section on which the relocation applies
657 self.header.target = elf.shdrs[self.header.sh_info]
658 for r in self.relatab:
659 r.symbol = self.symtab[r.r_sym]
663 class SHash(BaseSection):
667 class SDynamic(BaseSection):
671 class SNote(BaseSection):
675 class SNobits(BaseSection):
682 return self.header.sh_size
685 def toBinArray(self):
689 class SRel(BaseSection):
693 class SShlib(BaseSection):
697 class SDynsym(SSymtab):
701 class Elf64_Phdr(object):
703 size = struct.calcsize(format)
708 object.__init__(self)
709 self.p_type = PT_NULL
710 self.p_flags = PF_X + PF_W + PF_R
718 def toBinArray(self):
719 res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset,
720 self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align)
726 def update_from_content(self, content):
727 """ Update ofset, address and sizes.
728 After having applied layout(),the content knows all these values."""
729 self.p_offset = content.file_offset
730 self.p_vaddr = content.virt_addr
731 self.p_filesz = content.physical_size
732 self.p_memsz = content.logical_size
735 class BaseSegment(object):
736 def __init__(self, align=0):
737 object.__init__(self)
741 def add_content(self, content):
742 self.content.append(content)
744 def toBinArray(self):
746 for c in self.content:
747 ba.extend(c.toBinArray())
753 return sum(c.size for c in self.content)
759 class TextSegment(BaseSegment):
760 def __init__(self, align=0):
761 BaseSegment.__init__(self, align)
764 virt_addr = self.virt_addr
765 file_offset = self.file_offset
766 for i in self.content:
767 i.virt_addr = virt_addr
768 i.file_offset = file_offset
770 virt_addr += i.logical_size
771 file_offset += i.physical_size
774 class DataSegment(BaseSegment):
775 def __init__(self, align=0):
776 BaseSegment.__init__(self, align)
779 def add_nobits(self, content):
780 self.nobits.append(content)
783 virt_addr = self.virt_addr
784 file_offset = self.file_offset
785 for i in self.content:
786 i.virt_addr = virt_addr
787 i.file_offset = file_offset
789 virt_addr += i.logical_size
790 file_offset += i.physical_size
791 for i in self.nobits:
792 i.virt_addr = virt_addr
795 virt_addr += i.logical_size
800 return self.physical_size + sum(c.logical_size for c in self.nobits)
804 class Dynamic(object):
806 object.__init__(self)
808 self.strtab = SStrtab()
813 # End the table with a DT_NULL without associated value.
814 return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q"))
819 def add_shlib(self, shlib):
820 offset = self.strtab.append(shlib)
821 self.dyntab.append(Elf64_Dyn(DT_NEEDED, offset))
823 def add_symtab(self, vaddr):
824 self.dyntab.append(Elf64_Dyn(DT_SYMTAB, vaddr))
827 self.dyntab.append(Elf64_Dyn(DT_DEBUG, 0))
830 # Adjust the address of the strtab, if
831 if self.strtab.virt_addr is None:
832 print "Ooops, strtab's address is not known yet. Aborting."
835 self.dyntab.append(Elf64_Dyn(DT_STRTAB, self.strtab.virt_addr))
838 def dt_debug_address():
840 for i, d in enumerate(self.dyntab):
841 if d.d_tag == DT_DEBUG:
842 return self.virt_addr + (i*d.size + (d.size/2))
846 def toBinArray(self):
848 for d in self.dyntab:
849 ba.extend(d.toBinArray())
850 null = struct.pack("<Q", DT_NULL)
855 class Interpreter(object):
857 Pseudo-section containing the null terminated string referencing the
860 @ivar size: Read-only attribute, size of the null terminated string.
861 @ivar logical_size: alias to size
862 @ivar physical_size: alias to size
864 default_interpreter = "/lib64/ld-linux-x86-64.so.2"
866 def __init__(self, interpreter=None):
868 @param interpreter: The interpreter ot use. Defaults to
869 "/lib64/ld-linux-x86-64.so.2", as per the specs.
870 @type interpreter: string
872 object.__init__(self)
874 self.interpreter = interpreter
876 self.interpreter = self.default_interpreter
882 return len(self.interpreter) + 1
887 def toBinArray(self):
889 @return: a L{BinArray} with the content of the pseudo-section.
891 ba = BinArray(self.interpreter)