module Bio::Blast::Remote::Genomenet

Description

The Bio::Blast::Remote::GenomeNet class contains methods for running remote BLAST searches on GenomeNet (blast.genome.jp/).

Usage

require 'bio'

# To run an actual BLAST analysis:
#   1. create a BLAST factory
blast_factory = Bio::Blast.remote('blastp', 'nr-aa',
                                  '-e 0.0001', 'genomenet')
#or:
blast_factory = Bio::Blast::Remote.genomenet('blastp', 'nr-aa',
                                             '-e 0.0001')

#   2. run the actual BLAST by querying the factory
report = blast_factory.query(sequence_text)

# Then, to parse the report, see Bio::Blast::Report

Available databases for Bio::Blast::Remote::GenomeNet

Up-to-date available databases can be obtained by using Bio::Blast::Remote::GenomeNet.databases(program). Short descriptions of databases

----------+-------+---------------------------------------------------
 program  | query | db (supported in GenomeNet)
----------+-------+---------------------------------------------------
 blastp   | AA    | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
----------+-------+ pir, prf, pdbstr
 blastx   | NA    | 
----------+-------+---------------------------------------------------
 blastn   | NA    | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
----------+-------+ htgs, dbsts, embl-nonst, embnonst-upd, epd,
 tblastn  | AA    | genes-nt, genome, vgenes.nuc
----------+-------+---------------------------------------------------

BLAST options

Options are basically the same as those of the blastall command in NCBI BLAST. See www.genome.jp/tools-bin/show_man?blast2

See also

References

Constants

Host

Public Class Methods

new(program, db, options = []) click to toggle source

Creates a remote BLAST factory using GenomeNet. Returns Bio::Blast object.

Note for future improvement: In the future, it might return Bio::Blast::Remote::GenomeNet or other object.

# File lib/bio/appl/blast/genomenet.rb, line 87
def self.new(program, db, options = [])
  Bio::Blast.new(program, db, options, 'genomenet')
end

Private Instance Methods

exec_genomenet(query) click to toggle source

executes BLAST and returns result as a string

# File lib/bio/appl/blast/genomenet.rb, line 160
def exec_genomenet(query)
  host = Host
  #host = "blast.genome.jp"
  #path = "/sit-bin/nph-blast"
  #path = "/sit-bin/blast" #2005.08.12
  path = "/tools-bin/blast" #2012.01.12

  options = make_command_line_options
  opt = Bio::Blast::NCBIOptions.new(options)

  program = opt.delete('-p')
  db = opt.delete('-d')

  # When database name starts with mine-aa or mine-nt,
  # space-separated list of KEGG organism codes can be given.
  # For example, "mine-aa eco bsu hsa".
  if /\A(mine-(aa|nt))\s+/ =~ db.to_s then
    db = $1
    myspecies = {}
    myspecies["myspecies-#{$2}"] = $'
  end

  matrix = opt.delete('-M') || 'blosum62'
  filter = opt.delete('-F') || 'T'

  opt_v = opt.delete('-v') || 500 # default value for GenomeNet
  opt_b = opt.delete('-b') || 250 # default value for GenomeNet

  # format, not for form parameters, but included in option string
  opt_m = opt.get('-m') || '7' # default of BioRuby GenomeNet factory
  opt.set('-m', opt_m)

  optstr = Bio::Command.make_command_line_unix(opt.options)

  form = {
    'style'          => 'raw',
    'prog'           => program,
    'dbname'         => db,
    'sequence'       => query,
    'other_param'    => optstr,
    'matrix'         => matrix,
    'filter'         => filter,
    'V_value'        => opt_v, 
    'B_value'        => opt_b, 
    'alignment_view' => 0,
  }

  form.merge!(myspecies) if myspecies

  form.keys.each do |k|
    form.delete(k) unless form[k]
  end

  begin
    http = Bio::Command.new_http(host)
    http.open_timeout = 300
    http.read_timeout = 600
    result = Bio::Command.http_post_form(http, path, form)
    @output = result.body

    # workaround 2008.8.13
    if result.code == '302' then
      newuri = URI.parse(result['location'])
      newpath = newuri.path
      result = http.get(newpath)
      @output = result.body
      # waiting for BLAST finished
      while /Your job ID is/ =~ @output and
          /Your result will be displayed here\.?\<br\>/i =~ @output
        if /This page will be reloaded automatically in\s*((\d+)\s*min\.)?\s*((\d+)\s*sec\.)?/ =~ @output then
          reloadtime = $2.to_i * 60 + $4.to_i
          reloadtime = 300 if reloadtime > 300
          reloadtime = 1 if reloadtime < 1
        else
          reloadtime = 5
        end
        if $VERBOSE then
          $stderr.puts "waiting #{reloadtime} sec to reload #{newuri.to_s}"
        end
        sleep(reloadtime)
        result = http.get(newpath)
        @output = result.body
      end
    end

    # workaround 2005.08.12 + 2011.01.27 + 2011.7.22
    if /\<A +HREF=\"(http\:\/\/[\-\.a-z0-9]+\.genome\.jp)?(\/tmp\/[^\"]+)\"\>Show all result\<\/A\>/i =~ @output.to_s then
      all_prefix = $1
      all_path = $2
      all_prefix = "http://#{Host}" if all_prefix.to_s.empty?
      all_uri = all_prefix + all_path
      @output = Bio::Command.read_uri(all_uri)
      case all_path
      when /\.txt\z/
        ; # don't touch the data
      else
        txt = @output.to_s.split(/\<pre\>/)[1]
        raise 'cannot understand response' unless txt
        txt.sub!(/\<\/pre\>.*\z/m, '')
        txt.sub!(/.*^ \-{20,}\s*/m, '')
        @output = txt
      end
    else
      raise 'cannot understand response'
    end
  end

  # for -m 0 (NCBI BLAST default) output, html tags are removed.
  if opt_m.to_i == 0 then
    #@output_bak = @output
    txt = @output.sub!(/^\<select .*/, '')
    #txt.gsub!(/^\s*\<img +src\=\"\/Fig\/arrow\_top\.gif\"\>.+$\r?\n/, '')
    txt.gsub!(/^.+\<\/form\>$/, '')
    #txt.gsub!(/^\<form *method\=\"POST\" name\=\"clust\_check\"\>.+$\r?\n/, '')
    txt.gsub!(/\<a href\=\"\/tmp[^\"]\>\&uarr\;\&nbsp\;Top\<\/a\>/, '')
    txt.gsub!(/\<[^\>\<]+\>/m, '')
    txt.gsub!(/\&gt\;/, '>')
    txt.gsub!(/\&lt\;/, '<')
    @output = txt
  end

  return @output
end