class Bio::Blast::Default::Report::HSP

Bio::Blast::Default::Report::HSP holds information about the hsp (high-scoring segment pair).

Attributes

align_len[R]

aligned length

bit_score[R]

bit score

evalue[R]

e-value

gaps[R]

Gaps (number of gaps)

hit_frame[R]

frame of the hit

hit_from[R]

start position of the hit (the first position is 1)

hit_strand[R]

strand of the hit (“Plus” or “Minus” or nil)

hit_to[R]

end position of the hit (including its position)

hseq[R]

hit sequence (with gaps) of the alignment of the hsp

identity[R]

Identity (number of identical nucleotides or amino acids)

midline[R]

middle line of the alignment of the hsp

percent_gaps[R]

percent of gaps

percent_identity[R]

percent of identical nucleotides or amino acids

percent_positive[R]

percent of positive hit amino acids or nucleotides

positive[R]

Positives (number of positive hit amino acids or nucleotides)

qseq[R]

query sequence (with gaps) of the alignment of the hsp

query_frame[R]

frame of the query

query_from[R]

start position of the query (the first position is 1)

query_strand[R]

strand of the query (“Plus” or “Minus” or nil)

query_to[R]

end position of the query (including its position)

score[R]

score

stat_method[R]

statistical method for calculating evalue and/or score (nil or a string) (note that composition-based statistics for blastp or tblastn were enabled by default after NCBI BLAST 2.2.17)

Public Class Methods

new(data) click to toggle source

Creates new HSP object. It is designed to be called only internally from the Bio::Blast::Default::Report::Hit class. Users should not call the method directly.

# File lib/bio/appl/blast/format0.rb, line 949
def initialize(data)
  @f0score = data.shift
  @f0alignment = []
  while r = data[0] and /^(Query|Sbjct)\:/ =~ r
    @f0alignment << data.shift
  end
end

Private Class Methods

method_after_parse_alignment(*names) click to toggle source

Defines attributes which call #parse_alignment before accessing.

# File lib/bio/appl/blast/format0.rb, line 1180
def self.method_after_parse_alignment(*names)
  names.each do |x|
    module_eval("def #{x}; parse_alignment; @#{x}; end")
  end
end
method_after_parse_score(*names) click to toggle source

Defines attributes which call #parse_score before accessing.

# File lib/bio/appl/blast/format0.rb, line 1044
def self.method_after_parse_score(*names)
  names.each do |x|
    module_eval("def #{x}; parse_score; @#{x}; end")
  end
end

Private Instance Methods

parse_alignment() click to toggle source

Parses alignments.

# File lib/bio/appl/blast/format0.rb, line 1110
def parse_alignment
  unless defined?(@parse_alignment)
    qpos1 = nil
    qpos2 = nil
    spos1 = nil
    spos2 = nil
    qseq = []
    sseq = []
    mseq = []
    pos_st = nil
    len_seq = 0
    nextline = :q
    @f0alignment.each do |x|
      sc = StringScanner.new(x)
      while sc.rest?
        #p pos_st, len_seq
        #p nextline.to_s
        if r = sc.skip(/Query\: *(\d+) */) then
          pos_st = r
          pos1 = sc[1]
          len_seq = sc.skip(/[^ ]*/)
          seq = sc[0]
          sc.skip(/ *(\d+) *\n/)
          pos2 = sc[1]
          raise ScanError unless nextline == :q
          qpos1 = pos1.to_i unless qpos1
          qpos2 = pos2.to_i
          qseq << seq
          nextline = :m
        elsif r = sc.scan(/Sbjct\: *(\d+) *.+ +(\d+) *\n/) then
          pos1 = sc[1]
          pos2 = sc[2]
          raise ScanError unless pos_st
          raise ScanError unless len_seq
          seq = r[pos_st, len_seq]
          if nextline == :m then
            mseq << (' ' * len_seq)
          end
          spos1 = pos1.to_i unless spos1
          spos2 = pos2.to_i
          sseq << seq
          nextline = :q
        elsif r = sc.scan(/ {6}.+/) then
          raise ScanError unless nextline == :m
          mseq << r[pos_st, len_seq]
          sc.skip(/\n/)
          nextline = :s
        elsif r = sc.skip(/pattern +\d+.+/) then
          # PHI-BLAST
          # do nothing
          sc.skip(/\n/)
        else
          raise ScanError
        end
      end #while
    end #each
    #p qseq, sseq, mseq
    @qseq = qseq.join('')
    @hseq = sseq.join('')
    @midline = mseq.join('')
    @query_from = qpos1
    @query_to   = qpos2
    @hit_from = spos1
    @hit_to   = spos2
    @parse_alignment = true
  end #unless
end
parse_score() click to toggle source

Parses scores, identities, positives, gaps, and so on.

# File lib/bio/appl/blast/format0.rb, line 958
def parse_score
  unless defined?(@parse_score)
    sc = StringScanner.new(@f0score)
    while sc.rest?
      sc.skip(/\s*/)
      if sc.skip(/Expect(?:\(\d+\))? *\= *([e\+\-\.\d]+)/) then
        ev = sc[1].to_s
        ev = '1' + ev if ev[0] == ?e
        @evalue = ev.to_f
      elsif sc.skip(/Score *\= *([e\+\-\.\d]+) *bits *\( *([e\+\-\.\d]+) *\)/) then
        bs = sc[1]
        bs = '1' + bs if bs[0] == ?e
        @bit_score = bs.to_f
        @score = sc[2].to_i
      elsif sc.skip(/(Identities|Positives|Gaps) *\= (\d+) *\/ *(\d+) *\(([\.\d]+) *\% *\)/) then
        alen = sc[3].to_i
        @align_len = alen unless defined?(@align_len)
        raise ScanError if alen != @align_len
        case sc[1]
        when 'Identities'
          @identity = sc[2].to_i
          @percent_identity = sc[4].to_i
        when 'Positives'
          @positive = sc[2].to_i
          @percent_positive = sc[4].to_i
        when 'Gaps'
          @gaps = sc[2].to_i
          @percent_gaps = sc[4].to_i
        else
          raise ScanError
        end
      elsif sc.skip(/Strand *\= *(Plus|Minus) *\/ *(Plus|Minus)/) then
        @query_strand = sc[1]
        @hit_strand = sc[2]
        if sc[1] == sc[2] then
          @query_frame = 1
          @hit_frame = 1
        elsif sc[1] == 'Plus' then # Plus/Minus
          # complement sequence against xml(-m 7)
          # In xml(-m 8), -1=>Plus, 1=>Minus ???
          #@query_frame = -1
          #@hit_frame = 1
          @query_frame = 1
          @hit_frame = -1
        else # Minus/Plus
          @query_frame = -1
          @hit_frame = 1
        end
      elsif sc.skip(/Frame *\= *([\-\+]\d+)( *\/ *([\-\+]\d+))?/) then
        @query_frame = sc[1].to_i
        if sc[2] then
          @hit_frame = sc[3].to_i
        end
      elsif sc.skip(/Score *\= *([e\+\-\.\d]+) +\(([e\+\-\.\d]+) *bits *\)/) then
        #WU-BLAST
        @score = sc[1].to_i
        bs = sc[2]
        bs = '1' + bs if bs[0] == ?e
        @bit_score = bs.to_f
      elsif sc.skip(/P *\= * ([e\+\-\.\d]+)/) then
        #WU-BLAST
        @p_sum_n = nil
        pv = sc[1]
        pv = '1' + pv if pv[0] == ?e
        @pvalue = pv.to_f
      elsif sc.skip(/Sum +P *\( *(\d+) *\) *\= *([e\+\-\.\d]+)/) then
        #WU-BLAST
        @p_sum_n = sc[1].to_i
        pv = sc[2]
        pv = '1' + pv if pv[0] == ?e
        @pvalue = pv.to_f
      elsif sc.skip(/Method\:\s*(.+)/) then
        # signature of composition-based statistics method
        # for example, "Method: Composition-based stats."
        @stat_method = sc[1]
      else
        raise ScanError
      end
      sc.skip(/\s*\,?\s*/)
    end
    @parse_score = true
  end
end