class Bio::HMMER::Report
A parser class for a search report by hmmsearch or hmmpfam program in the HMMER package.
Examples¶ ↑
Examples #for multiple reports in a single output file (example.hmmpfam) Bio::HMMER.reports(File.read("example.hmmpfam")) do |report| report.program['name'] report.parameter['HMM file'] report.query_info['Query sequence'] report.hits.each do |hit| hit.accession hit.description hit.score hit.evalue hit.hsps.each do |hsp| hsp.accession hsp.domain hsp.evalue hsp.midline end end
References¶ ↑
Constants
- DELIMITER
Delimiter of each entry for Bio::FlatFile support.
Attributes
statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
statistics by hmmsearch.
Returns an Array of Bio::HMMER::Report::Hsp objects. Under special circumstances, some HSPs do not have parent Hit objects. If you want to access such HSPs, use this method.
A hash contains parameters used. Valid keys are 'HMM file' and 'Sequence file'.
A Hash contains program information used. Valid keys are 'name', 'version', 'copyright' and 'license'.
A hash contains the query information. Valid keys are 'query sequence', 'Accession' and 'Description'.
statistics by hmmsearch. Keys are 'mu', 'lambda', 'chi-sq statistic' and 'P(chi-square)'.
statistics by hmmsearch.
statistics by hmmsearch. Keys are 'Total memory', 'Satisfying E cutoff' and 'Total hits'.
Public Class Methods
Parses a HMMER search report (by hmmpfam or hmmsearch program) and reutrns a Bio::HMMER::Report object.
Examples¶ ↑
hmmpfam_report = Bio::HMMER::Report.new(File.read("hmmpfam.out")) hmmsearch_report = Bio::HMMER::Report.new(File.read("hmmsearch.out"))
# File lib/bio/appl/hmmer/report.rb, line 156 def initialize(data) # The input data is divided into six data fields, i.e. header, # query infomation, hits, HSPs, alignments and search statistics. # However, header and statistics data don't necessarily exist. subdata, is_hmmsearch = get_subdata(data) # if header exists, parse it if subdata["header"] @program, @parameter = parse_header_data(subdata["header"]) else @program, @parameter = [{}, {}] end @query_info = parse_query_info(subdata["query"]) @hits = parse_hit_data(subdata["hit"]) @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch) if @hsps != [] # split alignment subdata into an array of alignments aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1) # append alignment information to corresponding Hsp aln_ary.each_with_index do |aln, i| @hsps[i].set_alignment(aln) end end # assign each Hsp object to its parent Hit hits_hash = {} @hits.each do |hit| hits_hash[hit.accession] = hit end @hsps.each do |hsp| if hits_hash.has_key?(hsp.accession) hits_hash[hsp.accession].append_hsp(hsp) end end # parse statistics (for hmmsearch) if is_hmmsearch @histogram, @statistical_detail, @total_seq_searched, @whole_seq_top_hits, @domain_top_hits = parse_stat_data(subdata["statistics"]) end end
Public Instance Methods
Iterates each hit (Bio::HMMER::Report::Hit).
# File lib/bio/appl/hmmer/report.rb, line 206 def each @hits.each do |hit| yield hit end end
Private Instance Methods
# File lib/bio/appl/hmmer/report.rb, line 215 def get_subdata(data) subdata = {} header_prefix = '\Ahmm(search|pfam) - search' query_prefix = '^Query (HMM|sequence): .*\nAccession: ' hit_prefix = '^Scores for (complete sequences|sequence family)' hsp_prefix = '^Parsed for domains:' aln_prefix = '^Alignments of top-scoring domains:\n' stat_prefix = '^\nHistogram of all scores:' # if header exists, get it if data =~ /#{header_prefix}/ is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m] else is_hmmsearch = false # if no header, assumed to be hmmpfam end # get query, Hit and Hsp data subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m] subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m] subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m] # get alignment data if is_hmmsearch data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m subdata["alignment"] = $1 else data =~ /#{aln_prefix}(.+?)\/\/\n/m subdata["alignment"] = $1 raise "multiple reports found" if $'.length > 0 end # handle -A option of HMMER cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z' subdata["alignment"].sub!(/#{cutoff_line}/, '') # get statistics data subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m] [subdata, is_hmmsearch] end
# File lib/bio/appl/hmmer/report.rb, line 260 def parse_header_data(data) data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m program_data = $1 parameter_data = $2 program = {} program['name'], program['version'], program['copyright'], program['license'] = program_data.split(/\n/) parameter = {} parameter_data.each_line do |x| if /^(.+?):\s+(.*?)\s*$/ =~ x parameter[$1] = $2 end end [program, parameter] end
# File lib/bio/appl/hmmer/report.rb, line 297 def parse_hit_data(data) data.sub!(/.+?---\n/m, '').chop! hits = [] return hits if data == "\t[no hits above thresholds]\n" data.each_line do |l| hits.push(Hit.new(l)) end hits end
# File lib/bio/appl/hmmer/report.rb, line 310 def parse_hsp_data(data, is_hmmsearch) data.sub!(/.+?---\n/m, '').chop! hsps=[] return hsps if data == "\t[no hits above thresholds]\n" data.each_line do |l| hsps.push(Hsp.new(l, is_hmmsearch)) end return hsps end
# File lib/bio/appl/hmmer/report.rb, line 282 def parse_query_info(data) hash = {} data.each_line do |x| if /^(.+?):\s+(.*?)\s*$/ =~ x hash[$1] = $2 elsif /\s+\[(.+)\]/ =~ x hash['comments'] = $1 end end hash end
# File lib/bio/appl/hmmer/report.rb, line 323 def parse_stat_data(data) data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '') histogram = $1.strip statistical_detail = {} data.sub!(/(.+?)\n\n/m, '') $1.each_line do |l| statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l end total_seq_searched = nil data.sub!(/(.+?)\n\n/m, '') $1.each_line do |l| total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l end whole_seq_top_hits = {} data.sub!(/(.+?)\n\n/m, '') $1.each_line do |l| if /^\s*(.+?):\s*(\d+)\s*$/ =~ l whole_seq_top_hits[$1] = $2.to_i elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l whole_seq_top_hits[$1] = $2 end end domain_top_hits = {} data.each_line do |l| if /^\s*(.+?):\s*(\d+)\s*$/ =~ l domain_top_hits[$1] = $2.to_i elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l domain_top_hits[$1] = $2 end end [histogram, statistical_detail, total_seq_searched, whole_seq_top_hits, domain_top_hits] end