class Bio::PSORT::PSORT2::Report
Bio::PSORT::PSORT2::Report¶ ↑
Report parser classe for PSORT II(PSORT2).
Example¶ ↑
Constants
Attributes
Definition of query sequence.
#entry_id of query sequence.
Feature vector used the kNN prediction.
k parameter of k-nearest neighbors classifier.
Predicted subcellular localization (three letters code).
Probability vector of kNN prediction.
Raw text of output report.
Given subcellular localization (three letters code).
Sequence of query sequence.
Public Class Methods
Parser for the default report format. “psort report'' output.
# File lib/bio/appl/psort/report.rb, line 273 def self.default_parser(ent, entry_id = nil) report = self.new(ent, entry_id) ent = ent.split(/\n\n/).map {|e| e.chomp } report.set_header_line(ent[0]) # feature matrix ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe| pair = fe.split(/: /) report.features[pair[0].strip] = pair[1].strip.to_f } report.prob = self.set_kNN_prob(ent[2]) report.set_prediction(ent[3]) return report end
Divides entry body
# File lib/bio/appl/psort/report.rb, line 392 def self.divent(entry) boundary = entry.index(BOUNDARY) return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)] end
Constructs aBio::PSORT::PSORT2::Report object.
# File lib/bio/appl/psort/report.rb, line 227 def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, seq = nil, k = nil, features = {}, prob = {}, pred = nil) @entry_id = entry_id @scl = scl @definition = definition @seq = seq @features = features @prob = prob @pred = pred @k = k @raw = raw end
Parses output report with output format detection automatically.
# File lib/bio/appl/psort/report.rb, line 242 def self.parser(str, entry_id) case str when /^ psg:/ # default report self.default_parser(str, entry_id) when /^PSG:/ # -v report self.v_parser(str, entry_id) when /: too short length / self.too_short_parser(str, entry_id) when /PSORT II server/ tmp = self.new(ent, entry_id) else raise ArgumentError, "invalid format\n[#{str}]" end end
Returns @prob value.
# File lib/bio/appl/psort/report.rb, line 309 def self.set_kNN_prob(str) prob = Hash.new Bio::PSORT::PSORT2::SclNames.keys.each {|a| prob.update( {a => 0.0} ) } str.gsub(/\t/,'').split(/\n/).each {|a| val,scl = a.strip.split(/ %: /) key = Bio::PSORT::PSORT2::SclNames.index(scl) prob[key] = val.to_f } return prob end
Parser for “too short length'' report.
$id: too short length ($leng), skipped\n";
# File lib/bio/appl/psort/report.rb, line 260 def self.too_short_parser(ent, entry_id = nil) report = self.new(ent) report.entry_id = entry_id if ent =~ /^(.+)?: too short length/ report.entry_id = $1 unless report.entry_id report.scl = '---' end report end
Parser for the verbose output report format. “psort -v report'' and WWW server output.
# File lib/bio/appl/psort/report.rb, line 338 def self.v_parser(ent, entry_id = nil) report = Bio::PSORT::PSORT2::Report.new(ent, entry_id) ent = ent.split(/\n\n/).map {|e| e.chomp } ent.each_with_index {|e, i| unless /^(\w|-|\>|\t)/ =~ e j = self.__send__(:search_j, i, ent) ent[i - j] += e ent[i] = nil end if /^none/ =~ e # psort output bug j = self.__send__(:search_j, i, ent) ent[i - j] += e ent[i] = nil end } ent.compact! if /^ PSORT II server/ =~ ent[0] # for WWW version ent.shift delline = '' ent.each {|e| delline = e if /^Results of Subprograms/ =~ e } i = ent.index(delline) ent.delete(delline) ent.delete_at(i - 1) end report.set_header_line(ent.shift) report.seq = Bio::Sequence::AA.new(ent.shift) fent, pent = self.divent(ent) report.set_features(fent) report.prob = self.set_kNN_prob(pent[0].strip) report.set_prediction(pent[1].strip) return report end
Private Class Methods
# File lib/bio/appl/psort/report.rb, line 378 def self.search_j(i, ent) j = 1 1.upto(ent.size) {|x| if ent[i - x] j = x break end } return j end
Public Instance Methods
Sets @features values.
# File lib/bio/appl/psort/report.rb, line 398 def set_features(features_ary) features_ary.each {|fent| key = fent.split(/\:( |\n)/)[0].strip self.features[key] = fent # unless /^\>/ =~ key } self.features['AA'] = self.seq.length end
Returns header information.
# File lib/bio/appl/psort/report.rb, line 292 def set_header_line(str) str.sub!(/^-+\n/,'') tmp = str.split(/\t| /) @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id case tmp.join(' ').chomp when /\(\d+ aa\) (.+)$/ @definition = $1 else @definition = tmp.join(' ').chomp end scl = @definition.split(' ')[0] @scl = scl if SclNames.keys.index(scl) end
Returns @prob and @k values.
# File lib/bio/appl/psort/report.rb, line 323 def set_prediction(str) case str when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/ @entry_id ||= $1 unless @entry_id @pred = $2 @k = $3 else raise ArgumentError, "Invalid format at(#{self.entry_id}):\n[#{str}]\n" end end