1 # -*- coding: utf-8 -*-
2 # kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
4 # Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
6 # This file is part of bold, the Byte Optimized Linker.
8 # You can redistribute this file and/or modify it under the terms of the
9 # GNU General Public License as published by the Free Software Foundation,
10 # either version 3 of the License or (at your option) any later version.
13 from BinArray import BinArray
14 from constants import *
19 def nested_property(c):
20 return property(**c())
23 #--------------------------------------------------------------------------
25 #--------------------------------------------------------------------------
28 """Handles an Elf64 object."""
29 interpreter = "/lib64/ld-linux-x86-64.so.2"
31 def __init__(self, path=None):
33 self.header = Elf64_Ehdr()
34 self.header.owner = self
40 self.local_symbols = {}
41 self.global_symbols = {}
42 self.undefined_symbols = []
43 self.common_symbols = []
49 # Functions for relocatables files used as input
51 def fromfile(self, path):
56 data.fromfile(f, Elf64_Ehdr.size)
57 self.header.fromBinArray(data)
59 # This linker only supports relocatable objects
60 if self.header.e_type != ET_REL:
61 raise NotRelocatableObject(path)
63 if self.header.e_ident.ei_class != ELFCLASS64:
64 raise UnsupportedObject(path, "Not %s" % ELFCLASS64)
66 if self.header.e_machine != EM_X86_64:
67 raise UnsupportedObject(path, "Not %s" % EM_X86_64)
69 # Load sections headers
70 f.seek(self.header.e_shoff)
71 for i in range(self.header.e_shnum):
73 data.fromfile(f, self.header.e_shentsize)
74 h = Elf64_Shdr(i, data)
78 # Read sections content
81 if sh.sh_type != SHT_NOBITS:
83 data.fromfile(f, sh.sh_size)
88 def resolve_names(self):
89 # The .shstrtab index is in Elf Header. find the sections names
90 strtab = self.shdrs[self.header.e_shstrndx].content
93 sh.name = strtab[int(sh.sh_name)]
94 self.sections[sh.name] = sh
96 # And resolve names in the section itself
100 def find_symbols(self):
101 for sh in self.shdrs:
102 if sh.sh_type == SHT_SYMTAB:
103 symtab = sh.content.symtab
105 for symbol in symtab:
106 if symbol.st_type == STT_FILE:
108 if symbol.st_shndx == SHN_ABS:
110 if symbol.st_shndx == SHN_COMMON:
112 sym = (symbol.name, symbol.st_size, symbol.st_value)
113 self.common_symbols.append(sym)
115 if symbol.st_shndx == SHN_UNDEF:
117 self.undefined_symbols.append(symbol.name)
120 target_section = self.shdrs[symbol.st_shndx]
122 symbol_name = symbol.name
123 value = symbol.st_value
124 bind = symbol.st_binding
126 # We got a name, a target section, and an offset in the section
127 if symbol.st_binding == STB_LOCAL:
128 if symbol.st_type == STT_SECTION:
129 symbol_name = target_section.name
130 self.local_symbols[symbol_name] = (target_section, value)
132 self.global_symbols[symbol_name] = (target_section, value)
134 def apply_relocation(self, all_global_symbols):
135 # find relocation tables
136 relocations = [sh for sh in self.shdrs if sh.sh_type in [SHT_REL, SHT_RELA]]
137 for sh in relocations:
138 target = sh.target.content
140 for reloc in sh.content.relatab:
141 if reloc.symbol.st_shndx in [SHN_UNDEF, SHN_COMMON]:
142 # This is an extern or common symbol, find it in all_global_symbols
143 sym_address = all_global_symbols[reloc.symbol.name]
145 # source == in which section it is defined
146 source = self.shdrs[reloc.symbol.st_shndx].content
147 sym_address = source.virt_addr + reloc.symbol.st_value
149 target_ba = target.data # The actual BinArray that we'll modify
150 pc_address = target.virt_addr + reloc.r_offset
152 if reloc.r_type == R_X86_64_64:
153 format = "<Q" # Direct 64 bit address
154 target_value = sym_address + reloc.r_addend
155 elif reloc.r_type == R_X86_64_PC32:
156 format = "<i" # PC relative 32 bit signed
157 target_value = sym_address + reloc.r_addend - pc_address
158 elif reloc.r_type == R_X86_64_32:
159 format = "<I" # Direct 32 bit zero extended
160 target_value = sym_address + reloc.r_addend
161 elif reloc.r_type == R_X86_64_32S:
162 format = "<i" # Direct 32 bit sign extended
163 target_value = sym_address + reloc.r_addend
164 elif reloc.r_type == R_X86_64_PC16:
165 format = "<h" # 16 bit sign extended pc relative
166 target_value = sym_address + reloc.r_addend - pc_address
167 elif reloc.r_type == R_X86_64_16:
168 format = "<H" # Direct 16 bit zero extended
169 target_value = sym_address + reloc.r_addend
170 elif reloc.r_type == R_X86_64_PC8:
171 format = "b" # 8 bit sign extended pc relative
172 target_value = sym_address + reloc.r_addend - pc_address
173 elif reloc.r_type == R_X86_64_8:
174 format = "b" # Direct 8 bit sign extended
175 target_value = sym_address + reloc.r_addend
177 print "Unsupported relocation type: %s" % reloc.r_type
180 d = BinArray(struct.pack(format, target_value))
181 start = reloc.r_offset
183 target_ba[start:end] = d
186 # Functions for executables files, as output
188 def add_phdr(self, phdr):
189 self.phdrs.append(phdr)
190 self.header.e_phnum = len(self.phdrs)
193 def add_segment(self, segment):
194 self.segments.append(segment)
196 def layout(self, base_vaddr):
197 """Do the actual layout for final executable."""
199 virt_addr = base_vaddr
201 self.virt_addr = base_vaddr
202 self.file_offset = file_offset
203 for s in self.segments:
205 s.virt_addr = virt_addr
206 s.file_offset = file_offset
208 virt_addr += s.logical_size
209 file_offset += s.physical_size
211 def toBinArray(self):
213 for s in self.segments:
214 ba.extend(s.toBinArray())
218 #--------------------------------------------------------------------------
220 #--------------------------------------------------------------------------
222 class Elf64_eident(object):
223 """Detailed representation for the Elf identifier."""
225 size = struct.calcsize(format)
229 def __init__(self, rawdata=None):
230 object.__init__(self)
231 if rawdata is not None:
232 self.fromBinArray(rawdata)
234 def fromBinArray(self, rawdata):
235 t = struct.unpack(self.format, rawdata)
236 self.ei_magic = rawdata[:4]
237 self.ei_class = ElfClass(rawdata[4])
238 self.ei_data = ElfData(rawdata[5])
239 self.ei_version = ElfVersion(rawdata[6])
240 self.ei_osabi = ElfOsAbi(rawdata[7])
241 self.ei_abiversion = 0
242 self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
244 def make_default_amd64(self):
245 self.ei_magic = BinArray([0x7f, 0x45, 0x4c, 0x46])
246 self.ei_class = ELFCLASS64
247 self.ei_data = ELFDATA2LSB
248 self.ei_version = EV_CURRENT
249 self.ei_osabi = ELFOSABI_SYSV
250 self.ei_abiversion = 0
251 self.ei_pad = [0, 0, 0, 0, 0, 0, 0]
253 def toBinArray(self):
254 ba = BinArray(self.ei_magic)
255 ba.append(self.ei_class)
256 ba.append(self.ei_data)
257 ba.append(self.ei_version)
258 ba.append(self.ei_osabi)
259 ba.append(self.ei_abiversion)
260 ba.extend(self.ei_pad)
264 class Elf64_Ehdr(object):
265 """Elf file header"""
266 format = "<16B 2H I 3Q I 6H"
267 size = struct.calcsize(format)
271 def __init__(self, rawdata=None):
272 object.__init__(self)
273 self.e_ident = Elf64_eident()
274 self.e_type = ET_NONE
275 self.e_machine = EM_X86_64
276 self.e_version = EV_CURRENT
281 self.e_ehsize = self.size
282 self.e_phentsize = Elf64_Phdr.size
284 self.e_shentsize = Elf64_Shdr.size
287 if rawdata is not None:
288 self.fromBinArray(rawdata)
290 def fromBinArray(self, rawdata):
291 t = struct.unpack(self.format, rawdata)
292 self.e_ident = Elf64_eident(BinArray(rawdata[:16]))
293 self.e_type = ElfType(t[16])
294 self.e_machine = ElfMachine(t[17])
295 self.e_version = ElfVersion(t[18])
300 self.e_ehsize = t[23]
301 self.e_phentsize = t[24]
303 self.e_shentsize = t[26]
305 self.e_shstrndx = t[28]
307 def toBinArray(self):
308 # Build a list from e_ident and all other fields, to feed struct.pack.
309 values = self.e_ident.toBinArray().tolist()
310 values.extend([self.e_type, self.e_machine, self.e_version, self.e_entry,
311 self.e_phoff, self.e_shoff, self.e_flags, self.e_ehsize, self.e_phentsize,
312 self.e_phnum, self.e_shentsize, self.e_shnum, self.e_shstrndx])
313 res = struct.pack(self.format, *values)
320 #--------------------------------------------------------------------------
322 #--------------------------------------------------------------------------
324 class Elf64_Shdr(object):
325 """Elf64 section header."""
326 format = "<2I 4Q 2I 2Q"
327 size = struct.calcsize(format)
331 def __init__(self, index=None, rawdata=None):
332 object.__init__(self)
334 if rawdata is not None:
335 self.fromBinArray(rawdata)
337 def fromBinArray(self, rawdata):
338 t = struct.unpack(self.format, rawdata)
340 self.sh_type = ElfShType(t[1])
343 self.sh_offset = t[4]
347 self.sh_addralign = t[8]
348 self.sh_entsize = t[9]
350 def resolve_names(self):
351 self.content.resolve_names(self.owner)
357 def fset(self, data):
358 """Use the Section factory to get the subclass corresponding to the
359 session type specified in this header)."""
360 self._content = Section(self, data)
363 # For sections that contain elements of specific types :
365 class Elf64_Sym(object):
366 """Symbol Table entry"""
367 format = "<I 2B H 2Q "
368 entsize = struct.calcsize(format)
369 def __init__(self, rawdata=None):
370 object.__init__(self)
371 if rawdata is not None:
372 self.fromBinArray(rawdata)
377 return ElfSymbolBinding((self.st_info >> 4) & 0x0f)
378 def fset(self, value):
379 self.st_info = (((value & 0x0f) << 4) | (self.st_info & 0x0f))
385 return ElfSymbolType(self.st_info & 0x0f)
386 def fset(self, value):
387 self.st_info = ((self.st_info & 0xf0) | (value & 0x0f))
393 return ElfSymbolVisibility(self.st_other & 0x03)
394 def fset(self, value):
395 self.st_other = ((self.st_other & 0xfc) | (value & 0x03))
398 def fromBinArray(self, rawdata):
399 t = struct.unpack(self.format, rawdata)
400 self.st_name = t[0] # index in the strtab pointed by sh_link
403 self.st_shndx = ElfSectionIndex(t[3])
408 class Elf64_Rel(object):
410 def __init__(self, rawdata=None):
411 object.__init__(self)
412 self.r_addend = 0 # No addend in a Rel.
413 if rawdata is not None:
414 self.fromBinArray(rawdata)
416 def fromBinArray(sef, rawdata):
417 t = struct.unpack(self.format, rawdata)
424 return (self.r_info >> 32) & 0xffffffff
425 def fset(self, value):
426 self.r_info = ((value & 0xffffffff) << 32) | (self.r_info & 0xffffffff)
432 return Amd64Relocation(self.r_info & 0xffffffff)
433 def fset(self, value):
434 self.r_info = (self.r_info & 0xffffffff00000000) | (value & 0xffffffff)
438 class Elf64_Rela(Elf64_Rel):
440 def __init__(self, rawdata=None):
441 Elf64_Rel.__init__(self, rawdata)
443 def fromBinArray(self, rawdata):
444 t = struct.unpack(self.format, rawdata)
450 class Elf64_Dyn(object):
452 size = struct.calcsize(format)
453 def __init__(self, tag, value):
454 object.__init__(self)
462 def fset(self, value):
466 def toBinArray(self):
468 ba.fromstring(struct.pack(self.format, self.d_tag, self.d_val))
473 def Section(shdr, data=None):
474 """A section factory"""
477 SHT_PROGBITS: SProgBits,
482 SHT_DYNAMIC: SDynamic,
489 if shdr.sh_type in dataclass:
490 return dataclass[shdr.sh_type](shdr, data)
492 return BaseSection(shdr, data)
495 class BaseSection(object):
496 def __init__(self, shdr, rawdata=None):
497 object.__init__(self)
500 if rawdata is not None:
501 self.fromBinArray(rawdata)
503 def fromBinArray(self, rawdata):
506 def toBinArray(self):
512 def resolve_names(self, elf):
513 """Nothing to resolve."""
519 return len(self.data)
528 class SNull(BaseSection):
529 def __init__(self, shdr, data=None):
530 BaseSection.__init__(self, shdr, None)
533 class SProgBits(BaseSection):
534 def __init__(self, shdr, data=None):
535 BaseSection.__init__(self, shdr, data)
538 class SSymtab(BaseSection):
539 entsize = struct.calcsize(Elf64_Sym.format)
540 def __init__(self, shdr, data=None):
542 BaseSection.__init__(self, shdr, data)
544 def fromBinArray(self, data):
545 BaseSection.fromBinArray(self, data)
546 nument = len(data) / self.entsize
547 for i in range(nument):
548 start = i * self.entsize
549 end = i * self.entsize + self.entsize
550 self.symtab.append(Elf64_Sym(data[start:end]))
552 def resolve_names(self, elf):
553 # For a symtab, the strtab is indicated by sh_link
554 strtab = elf.shdrs[self.header.sh_link].content
555 # Resolve for all symbols in the table
556 for sym in self.symtab:
557 sym.name = strtab[sym.st_name]
559 def __getitem__(self, key):
560 return self.symtab[key]
563 class SStrtab(BaseSection):
564 """This one behaves in two completely different ways.
565 If it's given a section header and data, it will act as read-only, only to
566 be used for name resolution.
567 If it's not given any argument, it can be used to create a new Strtab."""
568 def __init__(self, shdr=None, data=None):
569 self.readonly = (shdr is not None)
573 BaseSection.__init__(self, shdr, data)
574 self.virt_addr = None
576 def toBinArray(self):
578 return BaseSection.toBinArray()
581 keys = self.by_index.keys()
584 ba.fromstring(self.by_index[k] + "\0")
590 if len(self.by_index) == 0:
592 return len(self.data)
598 return self.by_index.iteritems()
600 # Resolution functions
602 def fromBinArray(self, data):
603 BaseSection.fromBinArray(self, data)
604 itab = data.tostring().split('\0')
607 self.by_index[i] = sname
608 self.by_name[sname] = i
611 def __getitem__(self, key):
612 if isinstance(key, int):
613 # Find string by index
614 if key in self.by_index:
615 # Already computed, return it
616 return self.by_index[key]
618 # It references a substring
619 v = self.data[key:].tostring().split('\0')[0]
620 self.by_index[key] = v
621 self.by_name[v] = key
625 if key in self.by_name:
626 return self.by_name[key]
630 # Executable creation functions
632 def append(self, identifier):
633 if len(self.by_name) == 0:
636 last = max(self.by_index.keys())
637 offset = last + len(self.by_index[last]) + 1 # for the \0
638 self.by_index[offset] = identifier
639 self.by_name[identifier] = offset
640 self.data = self.toBinArray()
647 class SRela(BaseSection):
648 entsize = struct.calcsize(Elf64_Rela.format)
649 def __init__(self, shdr, data=None):
651 BaseSection.__init__(self, shdr, data)
653 def fromBinArray(self, data):
654 BaseSection.fromBinArray(self, data)
655 nument = len(data) / self.entsize
656 for i in range(nument):
657 start = i * self.entsize
658 end = i * self.entsize + self.entsize
659 self.relatab.append(Elf64_Rela(data[start:end]))
661 def resolve_names(self, elf):
662 """Badly named, this wil resolve to a symtab entry..."""
663 # sh_link leads to the symtab
664 self.symtab = elf.shdrs[self.header.sh_link].content
665 # sh_info links to the section on which the relocation applies
666 self.header.target = elf.shdrs[self.header.sh_info]
667 for r in self.relatab:
668 r.symbol = self.symtab[r.r_sym]
672 class SHash(BaseSection):
676 class SDynamic(BaseSection):
680 class SNote(BaseSection):
684 class SNobits(BaseSection):
691 return self.header.sh_size
694 def toBinArray(self):
698 class SRel(BaseSection):
702 class SShlib(BaseSection):
706 class SDynsym(SSymtab):
710 class Elf64_Phdr(object):
712 size = struct.calcsize(format)
717 object.__init__(self)
718 self.p_type = PT_NULL
719 self.p_flags = PF_X + PF_W + PF_R
727 def toBinArray(self):
728 res = struct.pack(self.format, self.p_type, self.p_flags, self.p_offset,
729 self.p_vaddr, self.p_paddr, self.p_filesz, self.p_memsz, self.p_align)
735 def update_from_content(self, content):
736 """ Update ofset, address and sizes.
737 After having applied layout(),the content knows all these values."""
738 self.p_offset = content.file_offset
739 self.p_vaddr = content.virt_addr
740 self.p_filesz = content.physical_size
741 self.p_memsz = content.logical_size
744 class BaseSegment(object):
745 def __init__(self, align=0):
746 object.__init__(self)
750 def add_content(self, content):
751 self.content.append(content)
753 def toBinArray(self):
755 for c in self.content:
756 ba.extend(c.toBinArray())
762 return sum(c.size for c in self.content)
768 class TextSegment(BaseSegment):
769 def __init__(self, align=0):
770 BaseSegment.__init__(self, align)
773 virt_addr = self.virt_addr
774 file_offset = self.file_offset
775 for i in self.content:
776 i.virt_addr = virt_addr
777 i.file_offset = file_offset
779 virt_addr += i.logical_size
780 file_offset += i.physical_size
783 class DataSegment(BaseSegment):
784 def __init__(self, align=0):
785 BaseSegment.__init__(self, align)
788 def add_nobits(self, content):
789 self.nobits.append(content)
792 virt_addr = self.virt_addr
793 file_offset = self.file_offset
794 for i in self.content:
795 i.virt_addr = virt_addr
796 i.file_offset = file_offset
798 virt_addr += i.logical_size
799 file_offset += i.physical_size
800 for i in self.nobits:
801 i.virt_addr = virt_addr
804 virt_addr += i.logical_size
809 return self.physical_size + sum(c.logical_size for c in self.nobits)
813 class Dynamic(object):
815 object.__init__(self)
817 self.strtab = SStrtab()
822 # End the table with a DT_NULL without associated value.
823 return (Elf64_Dyn.size * len(self.dyntab) + struct.calcsize("Q"))
828 def add_shlib(self, shlib):
829 offset = self.strtab.append(shlib)
830 self.dyntab.append(Elf64_Dyn(DT_NEEDED, offset))
832 def add_symtab(self, vaddr):
833 self.dyntab.append(Elf64_Dyn(DT_SYMTAB, vaddr))
836 self.dyntab.append(Elf64_Dyn(DT_DEBUG, 0))
839 # Adjust the address of the strtab, if
840 if self.strtab.virt_addr is None:
841 print "Ooops, strtab's address is not known yet. Aborting."
844 self.dyntab.append(Elf64_Dyn(DT_STRTAB, self.strtab.virt_addr))
847 def dt_debug_address():
849 for i, d in enumerate(self.dyntab):
850 if d.d_tag == DT_DEBUG:
851 return self.virt_addr + (i*d.size + (d.size/2))
855 def toBinArray(self):
857 for d in self.dyntab:
858 ba.extend(d.toBinArray())
859 null = struct.pack("<Q", DT_NULL)
864 class Interpreter(object):
866 Pseudo-section containing the null terminated string referencing the
869 @ivar size: Read-only attribute, size of the null terminated string.
870 @ivar logical_size: alias to size
871 @ivar physical_size: alias to size
873 default_interpreter = "/lib64/ld-linux-x86-64.so.2"
875 def __init__(self, interpreter=None):
877 @param interpreter: The interpreter ot use. Defaults to
878 "/lib64/ld-linux-x86-64.so.2", as per the specs.
879 @type interpreter: string
881 object.__init__(self)
883 self.interpreter = interpreter
885 self.interpreter = self.default_interpreter
891 return len(self.interpreter) + 1
896 def toBinArray(self):
898 @return: a L{BinArray} with the content of the pseudo-section.
900 ba = BinArray(self.interpreter)