2 # -*- coding: utf-8 -*-
3 # kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
5 # Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
7 # This file is part of bold, the Byte Optimized Linker.
9 # You can redistribute this file and/or modify it under the terms of the
10 # GNU Lesser General Public License as published by the Free Software
11 # Foundation, version 2.1.
14 Main entry point for the bold linker.
17 from constants import *
18 from BinArray import BinArray
19 from elf import Elf64, Elf64_Phdr, Elf64_Shdr, TextSegment, DataSegment
20 from elf import SStrtab, SSymtab, SProgBits, SNobits, Dynamic, Interpreter
22 from ctypes.util import find_library
26 """Caculate the hash of the function name."""
29 h = (ord(c) - h + (h << 6) + (h << 16) & 0xffffffff)
32 class BoldLinker(object):
33 """A Linker object takes one or more objects files, optional shared libs,
34 and arranges all this in an executable.
36 Important note: the external functions from the libraries are NOT resolved.
37 This import is left to the user, as it can be done more efficiently by hash.
38 (http://www.linuxdemos.org/contentarticle/how_to_start_4k_introdev_with_ibh)
39 For this, a very useful symbol is exported, : _dt_debug, the address of the
48 self.entry_point = "_start"
50 self.global_symbols = {}
51 self.undefined_symbols = set()
53 def add_object(self, filename):
54 """Add a relocatable file as input."""
60 def build_symbols_tables(self):
61 """Find out the globally available symbols, as well as the globally
62 undefined ones (which should be found in external libraries."""
64 # Gather the "extern" symbols from each input files.
66 self.undefined_symbols.update(i.undefined_symbols)
68 # Make a dict with all the symbols declared globally.
69 # Key is the symbol name, value will later be set to the final
70 # virtual address. Currently, we're only interrested in the declaration.
71 # The virtual addresses are set to None, they'll be resolved later.
73 for s in i.global_symbols:
74 if s in self.global_symbols:
75 raise RedefinedSymbol(s)
76 self.global_symbols[s] = None
78 # Add a few useful symbols. They'll be resolved ater as well.
79 self.global_symbols["_dt_debug"] = None
80 self.global_symbols["_DYNAMIC"] = None
82 # Find out which symbols aren't really defined anywhere
83 self.undefined_symbols.difference_update(self.global_symbols)
86 def build_external(self, with_jump=False, align_jump=True):
88 Generate a fake relocatable object, for dynamic linking.
91 # Find out all the undefined symbols. They're the one we'll need to resolve
93 symbols = sorted(list(self.undefined_symbols))
95 # Those three will soon be known...
96 symbols.remove('_bold__functions_count')
97 symbols.remove('_bold__functions_hash')
98 symbols.remove('_bold__functions_pointers')
100 # Create the fake ELF object.
101 fo = Elf64() # Don't care about most parts of ELF header (?)
102 fo.filename = "Internal dynamic linker"
104 # We need a .data section, a .bss section and a possibly a .text section
105 data_shdr = Elf64_Shdr()
106 data_shdr.sh_type = SHT_PROGBITS
107 data_shdr.sh_flags = (SHF_WRITE | SHF_ALLOC)
108 data_shdr.sh_size = len(symbols) * 4
109 fmt = "<" + "I" * len(symbols)
110 data_shdr.content = BinArray(struct.pack(fmt, *[hash_name(s) for s in symbols]))
111 fo.shdrs.append(data_shdr)
112 fo.sections['.data'] = data_shdr
114 bss_shdr = Elf64_Shdr()
115 bss_shdr.sh_type = SHT_NOBITS
116 bss_shdr.sh_flags = (SHF_WRITE | SHF_ALLOC)
117 bss_shdr.sh_size = len(symbols) * 8
118 bss_shdr.content = BinArray("")
119 fo.shdrs.append(bss_shdr)
120 fo.sections['.bss'] = bss_shdr
123 text_shdr = Elf64_Shdr()
124 text_shdr.sh_type = SHT_PROGBITS
125 text_shdr.sh_flags = (SHF_ALLOC | SHF_EXECINSTR)
126 text_shdr.sh_size = len(symbols) * 8
128 fmt = '\xff\x25\x00\x00\x00\x00\x00\x00' # ff 25 = jmp [rel label]
131 fmt = '\xff\x25\x00\x00\x00\x00'
133 text_shdr.content = BinArray(fmt * len(symbols))
134 fo.shdrs.append(text_shdr)
135 fo.sections['.text'] = text_shdr
137 # Cheating here. All symbols declared as global so we don't need to create
138 # a symtab from scratch.
139 fo.global_symbols = {}
140 fo.global_symbols['_bold__functions_count'] = (SHN_ABS, len(symbols))
141 fo.global_symbols['_bold__functions_hash'] = (data_shdr, 0)
142 fo.global_symbols['_bold__functions_pointers'] = (bss_shdr, 0)
144 for n, i in enumerate(symbols):
145 # The hash is always in .data
146 h = "_bold__hash_%s" % i
147 fo.global_symbols[h] = (data_shdr, n * 4) # Section, offset
150 # the symbol is in .text, can be called directly
151 fo.global_symbols[i] = (text_shdr, n * jmp_size)
152 # another symbol can be used to reference the pointer, just in case.
154 fo.global_symbols[p] = (bss_shdr, n * 8)
157 # The symbol is in .bss, must be called indirectly
158 fo.global_symbols[i] = (bss_shdr, n * 8)
161 # Add relocation entries for the jumps
162 # Relocation will be done for the .text, for every jmp instruction.
164 rela_shdr = Elf64_Shdr()
165 rela_shdr.sh_type = SHT_RELA
166 # rela_shdr.sh_info = fo.shdrs.index(text_shdr)
167 rela_shdr.target = text_shdr
168 rela_shdr.sh_flags = 0
169 rela_shdr._content = dummy() # We only need a container for relatab...
170 relatab = [] # Prepare a relatab
171 rela_shdr.content.relatab = relatab
173 for n, i in enumerate(symbols):
174 # Create a relocation entry for each symbol
176 reloc.r_offset = (n * jmp_size) + 2 # Beginning of the cell to update
178 reloc.r_type = R_X86_64_PC32
179 reloc.symbol = dummy()
180 reloc.symbol.st_shndx = SHN_UNDEF
181 reloc.symbol.name = "_bold__%s" % i
182 # reloc.symbol.st_value = 0
183 relatab.append(reloc)
184 fo.shdrs.append(rela_shdr)
185 fo.sections['.rela.text'] = rela_shdr
187 # Ok, let's add this fake object
191 def add_shlib(self, libname):
192 """Add a shared library to link against."""
193 # Note : we use ctypes' find_library to find the real name
194 fullname = find_library(libname)
196 raise LibNotFound(libname)
197 self.shlibs.append(fullname)
200 """Do the actual linking."""
201 # Prepare two segments. One for .text, the other for .data + .bss
202 self.text_segment = TextSegment()
203 # .data will be mapped 0x100000 bytes further
204 self.data_segment = DataSegment(align=0x100000)
205 self.output.add_segment(self.text_segment)
206 self.output.add_segment(self.data_segment)
208 # Adjust the ELF header
209 self.output.header.e_ident.make_default_amd64()
210 self.output.header.e_phoff = self.output.header.size
211 self.output.header.e_type = ET_EXEC
212 # Elf header lies inside .text
213 self.text_segment.add_content(self.output.header)
215 # Create the four Program Headers. They'll be inside .text
216 # The first Program Header defines .text
217 ph_text = Elf64_Phdr()
218 ph_text.p_type = PT_LOAD
219 ph_text.p_align = 0x100000
220 self.output.add_phdr(ph_text)
221 self.text_segment.add_content(ph_text)
223 # Second one defines .data + .bss
224 ph_data = Elf64_Phdr()
225 ph_data.p_type = PT_LOAD
226 ph_data.p_align = 0x100000
227 self.output.add_phdr(ph_data)
228 self.text_segment.add_content(ph_data)
230 # Third one is only there to define the DYNAMIC section
231 ph_dynamic = Elf64_Phdr()
232 ph_dynamic.p_type = PT_DYNAMIC
233 self.output.add_phdr(ph_dynamic)
234 self.text_segment.add_content(ph_dynamic)
236 # Fourth one is for interp
237 ph_interp = Elf64_Phdr()
238 ph_interp.p_type = PT_INTERP
239 self.output.add_phdr(ph_interp)
240 self.text_segment.add_content(ph_interp)
242 # We have all the needed program headers, update ELF header
243 self.output.header.ph_num = len(self.output.phdrs)
245 # Create the actual content for the interpreter section
246 interp = Interpreter()
247 self.text_segment.add_content(interp)
249 # Then the Dynamic section
251 # for all the requested libs, add a reference in the Dynamic table
252 for lib in self.shlibs:
253 dynamic.add_shlib(lib)
254 # Add an empty symtab, symbol resolution is not done.
255 dynamic.add_symtab(0)
256 # And we need a DT_DEBUG
259 # This belongs to .data
260 self.data_segment.add_content(dynamic)
261 # The dynamic table links to a string table for the libs' names.
262 self.text_segment.add_content(dynamic.strtab)
264 # We can now add the interesting sections to the corresponding segments
267 # Only ALLOC sections are worth it.
268 # This might require change in the future
269 if not (sh.sh_flags & SHF_ALLOC):
272 if (sh.sh_flags & SHF_EXECINSTR):
273 self.text_segment.add_content(sh.content)
274 else: # No exec, it's for .data or .bss
275 if (sh.sh_type == SHT_NOBITS):
276 self.data_segment.add_nobits(sh.content)
278 self.data_segment.add_content(sh.content)
280 # Now, everything is at its place.
281 # Knowing the base address, we can determine where everyone will fall
282 self.output.layout(base_vaddr=0x400000)
284 # Knowing the addresses of all the parts, Program Headers can be filled
285 # This will put the correct p_offset, p_vaddr, p_filesz and p_memsz
286 ph_text.update_from_content(self.text_segment)
287 ph_data.update_from_content(self.data_segment)
288 ph_interp.update_from_content(interp)
289 ph_dynamic.update_from_content(dynamic)
291 # All parts are at their final address, find out the symbols' addresses
293 for s in i.global_symbols:
294 # Final address is the section's base address + the symbol's offset
295 if i.global_symbols[s][0] == SHN_ABS:
296 addr = i.global_symbols[s][1]
298 addr = i.global_symbols[s][0].content.virt_addr
299 addr += i.global_symbols[s][1]
301 self.global_symbols[s] = addr
303 # Resolve the few useful symbols
304 self.global_symbols["_dt_debug"] = dynamic.dt_debug_address
305 self.global_symbols["_DYNAMIC"] = dynamic.virt_addr
307 # For now, it's an error. Later, we could try to find them in the shared
309 #if len(self.undefined_symbols):
310 # raise UndefinedSymbol(self.undefined_symbols.pop())
314 # We can now do the actual relocation
316 i.apply_relocation(self.global_symbols)
318 # And update the ELF header with the entry point
319 if not self.entry_point in self.global_symbols:
320 raise UndefinedSymbol(self.entry_point)
321 self.output.header.e_entry = self.global_symbols[self.entry_point]
326 def toBinArray(self):
327 return self.output.toBinArray()
329 def tofile(self, file_object):
330 return self.output.toBinArray().tofile(file_object)