module Bio::Sequence::Format::INSDFeatureHelper

Formatting helper methods for INSD (NCBI, EMBL, DDBJ) feature table

Constants

MonthStr

Private Instance Methods

fold(str, width) click to toggle source
# File lib/bio/sequence/format.rb, line 301
def fold(str, width)
  str.gsub(Regexp.new("(.{1,#{width}})"), "\\1\n")
end
fold_and_split_lines(str, width) click to toggle source
# File lib/bio/sequence/format.rb, line 305
def fold_and_split_lines(str, width)
  str.scan(Regexp.new(".{1,#{width}}"))
end
format_date(d) click to toggle source

formats a date from Date, DateTime, or Time object, or String.

# File lib/bio/sequence/format.rb, line 359
def format_date(d)
  begin
    yy = d.year
    mm = d.month
    dd = d.day
  rescue NoMethodError, NameError, ArgumentError, TypeError
    return sprintf("%-11s", d)
  end
  sprintf("%02d-%-3s-%04d", dd, MonthStr[mm], yy)
end
format_feature(feature, prefix, indent, width) click to toggle source

format an INSD feature

# File lib/bio/sequence/format.rb, line 265
def format_feature(feature, prefix, indent, width)
  result = prefix + sprintf("%-16s", feature.feature)

  position = feature.position
  #position = feature.locations.to_s

  result << wrap_and_split_lines(position, width).join("\n" + indent)
  result << "\n"
  result << format_qualifiers(feature.qualifiers, indent, width)
  return result
end
format_features(features, prefix, indent, width) click to toggle source

format INSD featurs

# File lib/bio/sequence/format.rb, line 256
def format_features(features, prefix, indent, width)
  result = []
  features.each do |feature|
    result.push format_feature(feature, prefix, indent, width)
  end
  return result.join('')
end
format_features_embl(features) click to toggle source

INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any case, it would be difficult to successfully call this method outside its expected context).

Output the EMBL feature format string of the sequence. Used in Bio::Sequence::Format#output.


Returns

String object

# File lib/bio/sequence/format.rb, line 247
def format_features_embl(features)
  prefix = 'FT   '
  indent = prefix + ' ' * 16
  fwidth = 80 - indent.length

  format_features(features, prefix, indent, fwidth)
end
format_features_genbank(features) click to toggle source

INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any case, it would be difficult to successfully call this method outside its expected context).

Output the Genbank feature format string of the sequence. Used in Bio::Sequence::Format#output.


Returns

String object

# File lib/bio/sequence/format.rb, line 231
def format_features_genbank(features)
  prefix = ' ' * 5
  indent = prefix + ' ' * 16
  fwidth = 79 - indent.length

  format_features(features, prefix, indent, fwidth)
end
format_qualifiers(qualifiers, indent, width) click to toggle source

format qualifiers

# File lib/bio/sequence/format.rb, line 278
def format_qualifiers(qualifiers, indent, width)
  qualifiers.collect do |qualifier|
    q = qualifier.qualifier
    v = qualifier.value.to_s

    if v == true
      lines = wrap_with_newline('/' + q, width)
    elsif q == 'translation'
      lines = fold("/#{q}=\"#{v}\"", width)
    else
      if v[/\D/] or q == 'chromosome'
        #v.delete!("\x00-\x1f\x7f-\xff")
        v.gsub!(/"/, '""')
        v = '"' + v + '"'
      end
      lines = wrap_with_newline('/' + q + '=' + v, width)
    end

    lines.gsub!(/^/, indent)
    lines
  end.join
end
null_date() click to toggle source

null date

# File lib/bio/sequence/format.rb, line 371
def null_date
  Date.new(0, 1, 1)
end
wrap(str, width = 80, prefix = '') click to toggle source
# File lib/bio/sequence/format.rb, line 342
def wrap(str, width = 80, prefix = '')
  actual_width = width - prefix.length
  result = wrap_and_split_lines(str, actual_width)
  result_string = result.join("\n#{prefix}")
  result_string = prefix + result_string unless result_string.empty?
  return result_string
end
wrap_and_split_lines(str, width) click to toggle source
# File lib/bio/sequence/format.rb, line 309
def wrap_and_split_lines(str, width)
  result = []
  lefts = str.chomp.split(/(?:\r\n|\r|\n)/)
  lefts.each do |left|
    left.rstrip!
    while left and left.length > width
      line = nil
      width.downto(1) do |i|
        if left[i..i] == ' ' or /[\,\;]/ =~ left[(i-1)..(i-1)]  then
          line = left[0..(i-1)].sub(/ +\z/, '')
          left = left[i..-1].sub(/\A +/, '')
          break
        end
      end
      if line.nil? then
        line = left[0..(width-1)]
        left = left[width..-1]
      end
      result << line
      left = nil if  left.to_s.empty?
    end
    result << left if left
  end
  return result
end
wrap_with_newline(str, width) click to toggle source
# File lib/bio/sequence/format.rb, line 335
def wrap_with_newline(str, width)
  result = wrap_and_split_lines(str, width)
  result_string = result.join("\n")
  result_string << "\n" unless result_string.empty?
  return result_string
end