class Metasm::ExeFormat
the superclass of all real executable formats main methods:
self.decode(str) => decodes the file format (imports/relocs/etc), no asm disassembly parse(source) => parses assembler source, fills self.source assemble => assembles self.source in binary sections/segments/whatever encode => builds imports/relocs tables, put all this together, links everything in self.encoded
Attributes
array of Data/Instruction/Align/Padding/Offset/Label, populated in parse
contains the binary version of the compiled program (EncodedData)
hash of labels generated by #new_label
Public Class Methods
creates a new object using the specified cpu, parses the asm source, and assemble
# File metasm/exe_format/main.rb, line 91 def self.assemble(cpu, source, file='<unk>', lineno=1) source, cpu = cpu, source if source.kind_of? CPU e = new(cpu) e.assemble(source, file, lineno) e end
same as assemble, reads asm source from the specified file
# File metasm/exe_format/main.rb, line 99 def self.assemble_file(cpu, filename) filename, cpu = cpu, filename if filename.kind_of? CPU assemble(cpu, File.read(filename), filename, 1) end
same as load, used by AutoExe
# File metasm/exe_format/main.rb, line 26 def self.autoexe_load(*x, &b) load(*x, &b) end
creates a new object using the specified cpu, parse/compile/assemble the C source
# File metasm/exe_format/main.rb, line 117 def self.compile_c(cpu, source, file='<unk>', lineno=1) source, cpu = cpu, source if source.kind_of? CPU e = new(cpu) e.compile_c(source, file, lineno) e end
# File metasm/exe_format/main.rb, line 124 def self.compile_c_file(cpu, filename) filename, cpu = cpu, filename if filename.kind_of? CPU compile_c(cpu, File.read(filename), filename, 1) end
# File metasm/exe_format/main.rb, line 54 def self.decode(raw, *a, &b) e = load(raw, *a, &b) e.decode e end
load_file
then decode
# File metasm/exe_format/main.rb, line 41 def self.decode_file(path, *a, &b) e = load_file(path, *a, &b) e.decode if not e.instance_variables.map { |iv| iv.to_s }.include?("@disassembler") e end
load_file
then decode header
# File metasm/exe_format/main.rb, line 48 def self.decode_file_header(path, *a, &b) e = load_file(path, *a, &b) e.decode_header e end
# File metasm/exe_format/main.rb, line 60 def self.decode_header(raw, *a, &b) e = load(raw, *a, &b) e.decode_header e end
creates a new instance, populates self.encoded with the supplied string
# File metasm/exe_format/main.rb, line 17 def self.load(str, *a, &b) e = new(*a, &b) if str.kind_of?(EncodedData); e.encoded = str else e.encoded << str end e end
same as load
, but from a file uses VirtualFile if available
# File metasm/exe_format/main.rb, line 34 def self.load_file(path, *a, &b) e = load(VirtualFile.read(path), *a, &b) e.filename ||= path e end
initializes self.cpu, creates an empty self.encoded
# File metasm/main.rb, line 287 def initialize(cpu=nil) @cpu = cpu @encoded = EncodedData.new @unique_labels_cache = {} end
Public Instance Methods
returns the address at which a given file offset would be mapped
# File metasm/exe_format/main.rb, line 209 def addr_to_fileoff(addr) addr end
chose among multiple possible sub-EncodedData assumes all ambiguous edata have the equivallent relocations in the same order
# File metasm/encode.rb, line 47 def assemble_resolve(ary) startlabel = new_label('section_start') # create two bindings where all elements are the shortest/longest possible minbinding = {} minoff = 0 maxbinding = {} maxoff = 0 ary.each { |elem| case elem when Array if elem.all? { |ed| ed.kind_of? EncodedData and ed.reloc.empty? } elem = [elem.sort_by { |ed| ed.length }.first] end elem.each { |e| e.export.each { |label, off| minbinding[label] = Expression[startlabel, :+, minoff + off] maxbinding[label] = Expression[startlabel, :+, maxoff + off] } } minoff += elem.map { |e| e.virtsize }.min maxoff += elem.map { |e| e.virtsize }.max when EncodedData elem.export.each { |label, off| minbinding[label] = Expression[startlabel, :+, minoff + off] maxbinding[label] = Expression[startlabel, :+, maxoff + off] } minoff += elem.virtsize maxoff += elem.virtsize when Align minoff += 0 maxoff += elem.val - 1 when Padding # find the surrounding Offsets and compute the largest/shortest edata sizes to determine min/max length for the padding prevoff = ary[0..ary.index(elem)].grep(Offset).last nextoff = ary[ary.index(elem)..-1].grep(Offset).first raise elem, 'need .offset after .pad' if not nextoff # find all elements between the surrounding Offsets previdx = prevoff ? ary.index(prevoff) + 1 : 0 surround = ary[previdx..ary.index(nextoff)-1] surround.delete elem if surround.find { |nelem| nelem.kind_of? Padding } raise elem, 'need .offset beetween two .pad' end if surround.find { |nelem| nelem.kind_of? Align and ary.index(nelem) > ary.index(elem) } raise elem, 'cannot .align after a .pad' # XXX really ? end # lenmin/lenmax are the extrem length of the Padding nxt = Expression[nextoff.val] ext = nxt.externals raise elem, "bad offset #{nxt}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) nxt = Expression[nxt, :-, startlabel] if not nxt.bind(minbinding).reduce.kind_of? ::Integer prv = Expression[prevoff ? prevoff.val : 0] ext = prv.externals raise elem, "bad offset #{prv}" if ext.length > 1 or (ext.length == 1 and not minbinding[ext.first]) prv = Expression[prv, :-, startlabel] if not prv.bind(minbinding).reduce.kind_of? ::Integer lenmin = Expression[nxt.bind(minbinding), :-, prv.bind(maxbinding)].reduce lenmax = Expression[nxt.bind(maxbinding), :-, prv.bind(minbinding)].reduce raise elem, "bad labels: #{lenmin}" if not lenmin.kind_of? ::Integer or not lenmax.kind_of? ::Integer surround.each { |nelem| case nelem when Array lenmin -= nelem.map { |e| e.virtsize }.max lenmax -= nelem.map { |e| e.virtsize }.min when EncodedData lenmin -= nelem.virtsize lenmax -= nelem.virtsize when Align lenmin -= nelem.val - 1 lenmax -= 0 end } raise elem, "no room for .pad before '.offset #{nextoff.val}' at #{Backtrace.backtrace_str(nextoff.backtrace)}, need at least #{-lenmax} more bytes" if lenmax < 0 minoff += [lenmin, 0].max maxoff += lenmax when Offset # nothing to do for now else raise "Internal error: bad object #{elem.inspect} in encode_resolve" end } # checks an expression linearity check_linear = lambda { |expr| expr = expr.reduce if expr.kind_of? Expression while expr.kind_of? Expression case expr.op when :* if expr.lexpr.kind_of? Numeric; expr = expr.rexpr elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr else break end when :/, :>>, :<< if expr.rexpr.kind_of? Numeric; expr = expr.lexpr else break end when :+, :- if not expr.lexpr; expr = expr.rexpr elsif expr.lexpr.kind_of? Numeric; expr = expr.rexpr elsif expr.rexpr.kind_of? Numeric; expr = expr.lexpr else break if not check_linear[expr.rexpr] expr = expr.lexpr end else break end end not expr.kind_of? Expression } # now we can resolve all relocations # for linear expressions of internal variables (ie differences of labels from the ary): # - calc target numeric bounds, and reject relocs not accepting worst case value # - else reject all but largest place available # then chose the shortest overall EData left ary.map! { |elem| case elem when Array # for each external, compute numeric target values using minbinding[external] and maxbinding[external] # this gives us all extrem values for linear expressions target_bounds = {} rec_checkminmax = lambda { |idx, target, binding, extlist| if extlist.empty? (target_bounds[idx] ||= []) << target.bind(binding).reduce else rec_checkminmax[idx, target, binding.merge(extlist.last => minbinding[extlist.last]), extlist[0...-1]] rec_checkminmax[idx, target, binding.merge(extlist.last => maxbinding[extlist.last]), extlist[0...-1]] end } # biggest size disponible for this relocation (for non-linear/external) wantsize = {} elem.each { |e| e.reloc.sort.each_with_index { |r_, i| r = r_[1] # has external ref if not r.target.bind(minbinding).reduce.kind_of?(Numeric) or not check_linear[r.target] # find the biggest relocation type for the current target wantsize[i] = elem.map { |edata| edata.reloc.sort[i][1].type }.sort_by { |type| Expression::INT_SIZE[type] }.last # XXX do not use rel.length else rec_checkminmax[i, r.target, {}, r.target.externals] end } } # reject candidates with reloc type too small acceptable = elem.find_all { |edata| r = edata.reloc.sort (0...r.length).all? { |i| if wantsize[i] r[i][1].type == wantsize[i] else target_bounds[i].all? { |b| Expression.in_range?(b, r[i][1].type) } end } } raise EncodeError, "cannot find candidate in #{elem.inspect}, immediate too big #{wantsize.inspect} #{target_bounds.inspect}" if acceptable.empty? # keep the shortest acceptable.sort_by { |edata| edata.virtsize }.first else elem end } # assemble all parts, resolve padding sizes, check offset directives edata = EncodedData.new # fills edata with repetitions of data until targetsize fillwith = lambda { |targetsize, data| if data if data.reloc.empty? and not data.data.empty? # avoid useless iterations nr = (targetsize-edata.virtsize) / data.length - 1 if nr > 0 dat = data.data.ljust(data.virtsize, 0.chr) edata << (dat * nr) end end while edata.virtsize + data.virtsize <= targetsize edata << data end if edata.virtsize < targetsize edata << data[0, targetsize - edata.virtsize] end else edata.virtsize = targetsize end } ary.each { |elem| case elem when EncodedData edata << elem when Align fillwith[EncodedData.align_size(edata.virtsize, elem.val), elem.fillwith] when Offset raise EncodeError, "could not enforce .offset #{elem.val} #{elem.backtrace}: offset now #{edata.virtsize}" if edata.virtsize != Expression[elem.val].bind(edata.binding(0)).reduce when Padding nextoff = ary[ary.index(elem)..-1].grep(Offset).first targetsize = Expression[nextoff.val].bind(edata.binding(0)).reduce ary[ary.index(elem)+1..ary.index(nextoff)-1].each { |nelem| targetsize -= nelem.virtsize } raise EncodeError, "no room for .pad #{elem.backtrace_str} before .offset #{nextoff.val}, would be #{targetsize-edata.length} bytes long" if targetsize < edata.length fillwith[targetsize, elem.fillwith] else raise "Internal error: #{elem.inspect}" end } edata end
encodes an Array of source (Label/Data/Instruction etc) to an EncodedData resolves ambiguities using
encode_resolve
# File metasm/encode.rb, line 13 def assemble_sequence(seq, cpu) # an array of edata or sub-array of ambiguous edata # its last element is always an edata ary = [EncodedData.new] seq.each { |e| case e when Label; ary.last.add_export(e.name, ary.last.virtsize) when Data; ary.last << e.encode(cpu.endianness) when Align, Padding e.fillwith = e.fillwith.encode(cpu.endianness) if e.fillwith and not e.fillwith.kind_of? EncodedData ary << e << EncodedData.new when Offset; ary << e << EncodedData.new when Instruction case i = cpu.encode_instruction(self, e) when Array case i.length when 0; raise EncodeError, "failed to encode #{e}" when 1; ary.last << i.first else ary << i << EncodedData.new # to solve later end else ary.last << i end end } edata = (ary.length > 1) ? assemble_resolve(ary) : ary.shift edata.fixup edata.binding edata end
should setup a default entrypoint for C code, including preparing args for main() etc
# File metasm/exe_format/main.rb, line 148 def c_set_default_entrypoint end
parses a bunch of standalone C code, compile and assemble it
# File metasm/exe_format/main.rb, line 105 def compile_c(source, file='<unk>', lineno=1) cp = @cpu.new_cparser tune_cparser(cp) cp.parse(source, file, lineno) read_c_attrs cp if respond_to? :read_c_attrs asm_source = @cpu.new_ccompiler(cp, self).compile puts asm_source if $DEBUG assemble(asm_source, 'C compiler output', 1) c_set_default_entrypoint end
add directive to change the current assembler section to the assembler
source src
# File metasm/exe_format/main.rb, line 130 def compile_setsection(src, section) src << section end
# File metasm/main.rb, line 294 def cpu @cpu ||= cpu_from_headers end
# File metasm/exe_format/serialstruct.rb, line 291 def curencoded; encoded; end
# File metasm/exe_format/main.rb, line 78 def decode_file(path) load_file(path) decode self end
# File metasm/exe_format/main.rb, line 84 def decode_file_header(path) load_file(path) decode_header self end
# File metasm/exe_format/serialstruct.rb, line 292 def decode_strz(ed = curencoded) if stop = ed.data.index(?\0, ed.ptr) ed.read(stop - ed.ptr + 1).chop else '' end end
disassembles the specified entrypoints initializes the disassembler if needed uses #get_default_entrypoints if the argument list is empty returns the disassembler
# File metasm/exe_format/main.rb, line 172 def disassemble(*entrypoints) entrypoints = get_default_entrypoints if entrypoints.empty? disassembler.disassemble(*entrypoints) @disassembler end
disassembles the specified entrypoints without backtracking initializes the disassembler if needed uses #get_default_entrypoints if the argument list is empty returns the disassembler
# File metasm/exe_format/main.rb, line 182 def disassemble_fast_deep(*entrypoints) entrypoints = get_default_entrypoints if entrypoints.empty? disassembler.disassemble_fast_deep(*entrypoints) @disassembler end
# File metasm/exe_format/main.rb, line 152 def disassembler @disassembler ||= init_disassembler end
returns a string containing asm-style section declaration
# File metasm/disassemble.rb, line 353 def dump_section_header(addr, edata) "\n// section at #{Expression[addr]}" end
saves the result of encode_string
in the specified file
overwrites existing files
# File metasm/exe_format/main.rb, line 203 def encode_file(path, *a) encode_string(*a) File.open(path, 'wb') { |fd| fd.write(@encoded.data) } end
encodes the executable as a string, checks that all relocations are resolved, and returns the raw string version
# File metasm/exe_format/main.rb, line 195 def encode_string(*a) encode(*a) raise ["Unresolved relocations:", @encoded.reloc.map { |o, r| "#{r.target} " + (Backtrace.backtrace_str(r.backtrace) if r.backtrace).to_s }].join("\n") if not @encoded.reloc.empty? @encoded.data end
returns the file offset where a mapped byte comes from
# File metasm/exe_format/main.rb, line 214 def fileoff_to_addr(foff) foff end
returns a list of entrypoints to disassemble (program entrypoint, exported functions…)
# File metasm/exe_format/main.rb, line 189 def get_default_entrypoints [] end
returns an array of [type, expression, length] that may be accessed by this instruction (type is :r/:w, len is in bytes)
# File metasm/disassemble.rb, line 361 def get_xrefs_rw(dasm, di) @cpu.get_xrefs_rw(dasm, di) end
returns an array of expressions that may be executed by this instruction
# File metasm/disassemble.rb, line 358 def get_xrefs_x(dasm, di) @cpu.get_xrefs_x(dasm, di) end
returns the exe disassembler if it does not exist, creates one, and feeds it with the exe sections
# File metasm/exe_format/main.rb, line 158 def init_disassembler @disassembler ||= Disassembler.new(self) @disassembler.cpu ||= cpu each_section { |edata, base| edata ||= EncodedData.new @disassembler.add_section edata, base } @disassembler end
return the label name corresponding to the specified offset of the encodeddata, creates it if necessary
# File metasm/main.rb, line 299 def label_at(edata, offset, base = '') if not l = edata.inv_export[offset] edata.add_export(l = new_label(base), offset) end l end
# File metasm/exe_format/main.rb, line 66 def load(str) if str.kind_of?(EncodedData); @encoded = str else @encoded << str end self end
# File metasm/exe_format/main.rb, line 73 def load_file(path) @filename ||= path load(VirtualFile.read(path)) end
hash mapping local anonymous label number => unique name defined only while parsing usage:
jmp 1f 1: jmp 1f jmp 1b 1:
defined in parse, replaced in use by Object#parse no macro-scope (macro are gsub-like, and no special handling for those labels is done)
# File metasm/parse.rb, line 275 def locallabels_bkw(id) @locallabels_bkw[id] end
# File metasm/parse.rb, line 278 def locallabels_fwd(id) @locallabels_fwd[id] ||= new_label("local_#{id}") end
creates a new label, that is guaranteed to never be returned again as long as this object (ExeFormat) exists
# File metasm/main.rb, line 307 def new_label(base = '') base = base.dup.tr('^a-zA-Z0-9_', '_') # use %x instead of to_s(16) for negative values base = (base << '_uuid' << ('%08x' % base.object_id)).freeze if base.empty? or @unique_labels_cache[base] @unique_labels_cache[base] = true base end
parses an asm source file to an array of Instruction/Data/Align/Offset/Padding
# File metasm/parse.rb, line 283 def parse(text, file='<ruby>', lineno=0) parse_init @lexer ||= cpu.new_asmprepro('', self) @lexer.feed text, file, lineno lasteol = true while not @lexer.eos? tok = @lexer.readtok next if not tok case tok.type when :space when :eol lasteol = true when :punct case tok.raw when '.' tok = tok.dup while ntok = @lexer.nexttok and ((ntok.type == :string) or (ntok.type == :punct and ntok.raw == '.')) tok.raw << @lexer.readtok.raw end parse_parser_instruction tok else raise tok, 'syntax error' end lasteol = false when :string ntok = nntok = nil if lasteol and ((ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ':') or (ntok and ntok.type == :space and nntok = @lexer.nexttok and nntok.type == :string and Data::DataSpec.include?(nntok.raw))) if tok.raw =~ /^[1-9][0-9]*$/ # handle anonymous local labels lname = @locallabels_bkw[tok.raw] = @locallabels_fwd.delete(tok.raw) || new_label('local_'+tok.raw) else lname = tok.raw raise tok, "invalid label name: #{lname.inspect} is reserved" if @cpu.check_reserved_name(lname) raise tok, "label redefinition" if new_label(lname) != lname end l = Label.new(lname) l.backtrace = tok.backtrace @cursource << l lasteol = false else lasteol = false @lexer.unreadtok ntok @lexer.unreadtok tok if Data::DataSpec.include?(tok.raw) @cursource << parse_data else @cursource << @cpu.parse_instruction(@lexer) end if lname = @locallabels_fwd.delete('endinstr') l = Label.new(lname) l.backtrace = tok.backtrace @cursource << l end end else raise tok, 'syntax error' end end puts "Undefined forward reference to anonymous labels #{@locallabels_fwd.keys.inspect}" if $VERBOSE and not @locallabels_fwd.empty? self end
# File metasm/parse.rb, line 428 def parse_data raise ParseError, 'internal error' if not tok = @lexer.readtok raise tok, 'invalid data type' if tok.type != :string or not Data::DataSpec.include?(tok.raw) type = tok.raw @lexer.skip_space_eol arr = [] loop do arr << parse_data_data(type) @lexer.skip_space if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ',' @lexer.skip_space_eol else @lexer.unreadtok ntok break end end Data.new(type, arr, 1, tok.backtrace) end
# File metasm/parse.rb, line 447 def parse_data_data(type) raise ParseError, 'need data content' if not tok = @lexer.readtok if tok.type == :punct and tok.raw == '?' Data.new type, :uninitialized, 1, tok.backtrace elsif tok.type == :quoted Data.new type, tok.value, 1, tok.backtrace else @lexer.unreadtok tok raise tok, 'invalid data' if not i = Expression.parse(@lexer) @lexer.skip_space if ntok = @lexer.readtok and ntok.type == :string and ntok.raw.downcase == 'dup' raise ntok, 'need immediate count expression' unless (count = i.reduce).kind_of? ::Integer @lexer.skip_space raise ntok, 'syntax error, ( expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != '(' content = [] loop do content << parse_data_data(type) @lexer.skip_space if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ',' @lexer.skip_space_eol else @lexer.unreadtok ntok break end end raise ntok, 'syntax error, ) expected' if not ntok = @lexer.readtok or ntok.type != :punct or ntok.raw != ')' Data.new type, content, count, tok.backtrace else @lexer.unreadtok ntok Data.new type, i, 1, tok.backtrace end end end
setup self.cursource here
# File metasm/parse.rb, line 260 def parse_init @locallabels_bkw ||= {} @locallabels_fwd ||= {} end
create a new label from base, parse it (incl optionnal additionnal src) returns the new label name
# File metasm/parse.rb, line 350 def parse_new_label(base='', src=nil) parse_init label = new_label(base) @cursource << Label.new(label) parse src label end
handles special directives (alignment, changing section, …) special directives start with a dot
# File metasm/parse.rb, line 360 def parse_parser_instruction(tok) case tok.raw.downcase when '.align' e = Expression.parse(@lexer).reduce raise self, 'need immediate alignment size' unless e.kind_of? ::Integer @lexer.skip_space if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ',' @lexer.skip_space_eol # allow single byte value or full data statement if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw) @lexer.unreadtok ntok type = 'db' else type = ntok.raw end fillwith = parse_data_data type else @lexer.unreadtok ntok end raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol @cursource << Align.new(e, fillwith, tok.backtrace) when '.pad' @lexer.skip_space if ntok = @lexer.readtok and ntok.type != :eol # allow single byte value or full data statement if not ntok.type == :string or not Data::DataSpec.include?(ntok.raw) @lexer.unreadtok ntok type = 'db' else type = ntok.raw end fillwith = parse_data_data(type) else @lexer.unreadtok ntok end raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol @cursource << Padding.new(fillwith, tok.backtrace) when '.offset' e = Expression.parse(@lexer) raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol @cursource << Offset.new(e, tok.backtrace) when '.padto' e = Expression.parse(@lexer) @lexer.skip_space if ntok = @lexer.readtok and ntok.type == :punct and ntok.raw == ',' @lexer.skip_space # allow single byte value or full data statement if not ntok = @lexer.readtok or not ntok.type == :string or not Data::DataSpec.include?(ntok.raw) @lexer.unreadtok ntok type = 'db' else type = ntok.raw end fillwith = parse_data_data type else @lexer.unreadtok ntok end raise tok, 'syntax error' if ntok = @lexer.nexttok and ntok.type != :eol @cursource << Padding.new(fillwith, tok.backtrace) << Offset.new(e, tok.backtrace) else @cpu.parse_parser_instruction(self, tok) end end
# File metasm/exe_format/main.rb, line 218 def shortname; self.class.name.split('::').last.downcase; end
prepare a cparser
# File metasm/exe_format/main.rb, line 139 def tune_cparser(cp) tune_prepro(cp.lexer) end
prepare a preprocessor before it reads any source, should define macros to identify the fileformat
# File metasm/exe_format/main.rb, line 135 def tune_prepro(l) end