1 # -*- coding: utf-8 -*-
2 # kate: space-indent on; indent-width 2; mixedindent off; indent-mode python;
4 # Copyright (C) 2009 Amand 'alrj' Tihon <amand.tihon@alrj.org>
6 # This file is part of bold, the Byte Optimized Linker.
8 # You can redistribute this file and/or modify it under the terms of the
9 # GNU General Public License as published by the Free Software Foundation,
10 # either version 3 of the License or (at your option) any later version.
13 Main entry point for the bold linker.
16 from constants import *
17 from BinArray import BinArray
18 from elf import Elf64, Elf64_Phdr, Elf64_Shdr, TextSegment, DataSegment
19 from elf import SStrtab, SSymtab, SProgBits, SNobits, Dynamic, Interpreter
21 from ctypes import CDLL
22 from ctypes.util import find_library
27 """Caculate the hash of the function name.
28 @param name: the string to hash
29 @return: 32 bits hash value.
33 h = ((h * 0x21) ^ ord(c)) & 0xffffffff
37 class BoldLinker(object):
38 """A Linker object takes one or more objects files, optional shared libs,
39 and arranges all this in an executable.
47 self.entry_point = "_start"
49 self.global_symbols = {}
50 self.undefined_symbols = set()
51 self.common_symbols = set()
54 def add_object(self, filename):
55 """Add a relocatable file as input.
56 @param filename: path to relocatable object file to add
64 def build_symbols_tables(self):
65 """Find out the globally available symbols, as well as the globally
66 undefined ones (which should be found in external libraries."""
68 # Gather the "extern" and common symbols from each input files.
70 self.undefined_symbols.update(i.undefined_symbols)
71 self.common_symbols.update(i.common_symbols)
73 # Make a dict with all the symbols declared globally.
74 # Key is the symbol name, value will later be set to the final
75 # virtual address. Currently, we're only interrested in the declaration.
76 # The virtual addresses are set to None, they'll be resolved later.
78 for s in i.global_symbols:
79 if s in self.global_symbols:
80 raise RedefinedSymbol(s)
81 self.global_symbols[s] = None
83 # Add a few useful symbols. They'll be resolved ater as well.
84 self.global_symbols["_dt_debug"] = None
85 self.global_symbols["_DYNAMIC"] = None
87 # Find out which symbols aren't really defined anywhere
88 self.undefined_symbols.difference_update(self.global_symbols)
90 # A symbol declared as COMMON in one object may very well have been
91 # defined in another. In this case, it will be present in the
93 # Take a copy because we can't change the set's size inside the loop
94 for i in self.common_symbols.copy():
95 if i[0] in self.global_symbols:
96 self.common_symbols.remove(i)
99 def build_external(self, with_jump=False, align_jump=False):
101 Generate a fake relocatable object, for dynamic linking.
102 This object is then automatically added in the list of ebjects to link.
103 TODO: This part is extremely non-portable.
106 # Find out all the undefined symbols. They're the one we'll need to resolve
108 symbols = sorted(list(self.undefined_symbols))
110 # Those three will soon be known...
111 symbols.remove('_bold__functions_count')
112 symbols.remove('_bold__functions_hash')
113 symbols.remove('_bold__functions_pointers')
115 # Create the fake ELF object.
116 fo = Elf64() # Don't care about most parts of ELF header (?)
117 fo.filename = "Internal dynamic linker"
119 # We need a .data section, a .bss section and a possibly a .text section
120 data_shdr = Elf64_Shdr()
121 data_shdr.sh_type = SHT_PROGBITS
122 data_shdr.sh_flags = (SHF_WRITE | SHF_ALLOC)
123 data_shdr.sh_size = len(symbols) * 4
124 fmt = "<" + "I" * len(symbols)
125 data_shdr.content = BinArray(struct.pack(fmt, *[hash_name(s) for s in symbols]))
126 fo.shdrs.append(data_shdr)
127 fo.sections['.data'] = data_shdr
129 # .bss will contain pointers to resolved external functions, as well as
130 # the COMMON symbols (from C tentative declaration).
131 bss_size = len(symbols) * 8
132 for s_name, s_size, s_alignment in self.common_symbols:
133 padding = (s_alignment - (bss_size % s_alignment))
134 bss_size += padding + s_size
136 bss_shdr = Elf64_Shdr()
137 bss_shdr.sh_type = SHT_NOBITS
138 bss_shdr.sh_flags = (SHF_WRITE | SHF_ALLOC)
139 bss_shdr.sh_size = bss_size
140 bss_shdr.content = BinArray("")
141 fo.shdrs.append(bss_shdr)
142 fo.sections['.bss'] = bss_shdr
145 text_shdr = Elf64_Shdr()
146 text_shdr.sh_type = SHT_PROGBITS
147 text_shdr.sh_flags = (SHF_ALLOC | SHF_EXECINSTR)
148 text_shdr.sh_size = len(symbols) * 8
150 fmt = '\xff\x25\x00\x00\x00\x00\x00\x00' # ff 25 = jmp [rel label]
153 fmt = '\xff\x25\x00\x00\x00\x00'
155 text_shdr.content = BinArray(fmt * len(symbols))
156 fo.shdrs.append(text_shdr)
157 fo.sections['.text'] = text_shdr
159 # Cheating here. All symbols declared as global so we don't need to create
160 # a symtab from scratch.
161 fo.global_symbols = {}
162 fo.global_symbols['_bold__functions_count'] = (SHN_ABS, len(symbols))
163 fo.global_symbols['_bold__functions_hash'] = (data_shdr, 0)
164 fo.global_symbols['_bold__functions_pointers'] = (bss_shdr, 0)
166 # The COMMON symbols. Assign an offset in .bss, declare as global.
167 bss_common_offset = len(symbols) * 8
168 for s_name, s_size, s_alignment in self.common_symbols:
169 padding = (s_alignment - (bss_common_offset % s_alignment))
170 bss_common_offset += padding
171 fo.global_symbols[s_name] = (bss_shdr, bss_common_offset)
172 bss_common_offset += s_size
175 for n, i in enumerate(symbols):
176 # The hash is always in .data
177 h = "_bold__hash_%s" % i
178 fo.global_symbols[h] = (data_shdr, n * 4) # Section, offset
181 # the symbol is in .text, can be called directly
182 fo.global_symbols[i] = (text_shdr, n * jmp_size)
183 # another symbol can be used to reference the pointer, just in case.
185 fo.global_symbols[p] = (bss_shdr, n * 8)
188 # The symbol is in .bss, must be called indirectly
189 fo.global_symbols[i] = (bss_shdr, n * 8)
192 # Add relocation entries for the jumps
193 # Relocation will be done for the .text, for every jmp instruction.
195 rela_shdr = Elf64_Shdr()
196 rela_shdr.sh_type = SHT_RELA
197 rela_shdr.target = text_shdr
198 rela_shdr.sh_flags = 0
199 rela_shdr._content = dummy() # We only need a container for relatab...
200 relatab = [] # Prepare a relatab
201 rela_shdr.content.relatab = relatab
203 for n, i in enumerate(symbols):
204 # Create a relocation entry for each symbol
206 reloc.r_offset = (n * jmp_size) + 2 # Beginning of the cell to update
208 reloc.r_type = R_X86_64_PC32
209 reloc.symbol = dummy()
210 reloc.symbol.st_shndx = SHN_UNDEF
211 reloc.symbol.name = "_bold__%s" % i
212 relatab.append(reloc)
213 fo.shdrs.append(rela_shdr)
214 fo.sections['.rela.text'] = rela_shdr
216 # Ok, let's add this fake object
220 def add_shlib(self, libname):
221 """Add a shared library to link against."""
222 # Note : we use ctypes' find_library to find the real name
223 fullname = find_library(libname)
225 raise LibNotFound(libname)
226 self.shlibs.append(fullname)
229 def check_external(self):
230 """Verify that all globally undefined symbols are present in shared
233 for libname in self.shlibs:
234 libs.append(CDLL(libname))
236 for symbol in self.undefined_symbols:
238 if symbol.startswith('_bold__'):
242 if hasattr(lib, symbol):
246 raise UndefinedSymbol(symbol)
250 """Do the actual linking."""
251 # Prepare two segments. One for .text, the other for .data + .bss
252 self.text_segment = TextSegment()
253 # .data will be mapped 0x100000 bytes further
254 self.data_segment = DataSegment(align=0x100000)
255 self.output.add_segment(self.text_segment)
256 self.output.add_segment(self.data_segment)
258 # Adjust the ELF header
259 self.output.header.e_ident.make_default_amd64()
260 self.output.header.e_phoff = self.output.header.size
261 self.output.header.e_type = ET_EXEC
262 # Elf header lies inside .text
263 self.text_segment.add_content(self.output.header)
265 # Create the four Program Headers. They'll be inside .text
266 # The first Program Header defines .text
267 ph_text = Elf64_Phdr()
268 ph_text.p_type = PT_LOAD
269 ph_text.p_align = 0x100000
270 self.output.add_phdr(ph_text)
271 self.text_segment.add_content(ph_text)
273 # Second one defines .data + .bss
274 ph_data = Elf64_Phdr()
275 ph_data.p_type = PT_LOAD
276 ph_data.p_align = 0x100000
277 self.output.add_phdr(ph_data)
278 self.text_segment.add_content(ph_data)
280 # Third one is only there to define the DYNAMIC section
281 ph_dynamic = Elf64_Phdr()
282 ph_dynamic.p_type = PT_DYNAMIC
283 self.output.add_phdr(ph_dynamic)
284 self.text_segment.add_content(ph_dynamic)
286 # Fourth one is for interp
287 ph_interp = Elf64_Phdr()
288 ph_interp.p_type = PT_INTERP
289 self.output.add_phdr(ph_interp)
290 self.text_segment.add_content(ph_interp)
292 # We have all the needed program headers, update ELF header
293 self.output.header.ph_num = len(self.output.phdrs)
295 # Create the actual content for the interpreter section
296 interp = Interpreter()
297 self.text_segment.add_content(interp)
299 # Then the Dynamic section
301 # for all the requested libs, add a reference in the Dynamic table
302 for lib in self.shlibs:
303 dynamic.add_shlib(lib)
304 # Add an empty symtab, symbol resolution is not done.
305 dynamic.add_symtab(0)
306 # And we need a DT_DEBUG
309 # This belongs to .data
310 self.data_segment.add_content(dynamic)
311 # The dynamic table links to a string table for the libs' names.
312 self.text_segment.add_content(dynamic.strtab)
314 # We can now add the interesting sections to the corresponding segments
317 # Only ALLOC sections are worth it.
318 # This might require change in the future
319 if not (sh.sh_flags & SHF_ALLOC):
322 if (sh.sh_flags & SHF_EXECINSTR):
323 self.text_segment.add_content(sh.content)
324 else: # No exec, it's for .data or .bss
325 if (sh.sh_type == SHT_NOBITS):
326 self.data_segment.add_nobits(sh.content)
328 self.data_segment.add_content(sh.content)
330 # Now, everything is at its place.
331 # Knowing the base address, we can determine where everyone will fall
332 self.output.layout(base_vaddr=0x400000)
334 # Knowing the addresses of all the parts, Program Headers can be filled
335 # This will put the correct p_offset, p_vaddr, p_filesz and p_memsz
336 ph_text.update_from_content(self.text_segment)
337 ph_data.update_from_content(self.data_segment)
338 ph_interp.update_from_content(interp)
339 ph_dynamic.update_from_content(dynamic)
341 # All parts are at their final address, find out the symbols' addresses
343 for s in i.global_symbols:
344 # Final address is the section's base address + the symbol's offset
345 if i.global_symbols[s][0] == SHN_ABS:
346 addr = i.global_symbols[s][1]
348 addr = i.global_symbols[s][0].content.virt_addr
349 addr += i.global_symbols[s][1]
351 self.global_symbols[s] = addr
353 # Resolve the few useful symbols
354 self.global_symbols["_dt_debug"] = dynamic.dt_debug_address
355 self.global_symbols["_DYNAMIC"] = dynamic.virt_addr
357 # We can now do the actual relocation
359 i.apply_relocation(self.global_symbols)
361 # And update the ELF header with the entry point
362 if not self.entry_point in self.global_symbols:
363 raise UndefinedSymbol(self.entry_point)
364 self.output.header.e_entry = self.global_symbols[self.entry_point]
369 def toBinArray(self):
370 return self.output.toBinArray()
373 def tofile(self, file_object):
374 return self.output.toBinArray().tofile(file_object)