Parse 'keggtab' KEGG database definition file which also includes Taxonomic category of the KEGG organisms.
The 'keggtab' file is included in
File format is something like
# KEGGTAB # # name type directory abbreviation # enzyme enzyme $BIOROOT/db/ideas/ligand ec ec alias enzyme (snip) # Human h.sapiens genes $BIOROOT/db/kegg/genes hsa H.sapiens alias h.sapiens hsa alias h.sapiens (snip) # # Taxonomy # (snip) animals alias hsa+mmu+rno+dre+dme+cel eukaryotes alias animals+plants+protists+fungi genes alias eubacteria+archaea+eukaryotes
Path for keggtab file and optionally set bioroot top directory. Environmental variable BIOROOT overrides bioroot.
# File lib/bio/db/kegg/keggtab.rb, line 54 def initialize(file_path, bioroot = nil) @bioroot = ENV['BIOROOT'] || bioroot @db_names = Hash.new @database = Hash.new @taxonomy = Hash.new File.open(file_path) do |f| parse_keggtab(f.read) end end
deprecated
# File lib/bio/db/kegg/keggtab.rb, line 141 def alias_list(db_name) if @db_names[db_name] @db_names[db_name].aliases end end
Returns an Array containing all alias names for the database. (e.g. 'hsa' -> ["H.sapiens", "hsa"], 'hpj' -> ["H.pylori_J99", "hpj"])
# File lib/bio/db/kegg/keggtab.rb, line 112 def aliases(db_abbrev) if @database[db_abbrev] @database[db_abbrev].aliases end end
# File lib/bio/db/kegg/keggtab.rb, line 196 def child_nodes(node = 'genes') return @taxonomy[node] end
Returns a hash containing DB definition section of the keggtab file. If database name is given as an argument, returns a Keggtab::DB object.
# File lib/bio/db/kegg/keggtab.rb, line 102 def database(db_abbrev = nil) if db_abbrev @database[db_abbrev] else @database end end
deprecated
# File lib/bio/db/kegg/keggtab.rb, line 157 def db_by_abbrev(db_abbrev) @db_names.each do |k, db| return db if db.abbrev == db_abbrev end return nil end
deprecated
# File lib/bio/db/kegg/keggtab.rb, line 148 def db_path(db_name) if @bioroot "#{@db_names[db_name].path.sub(/\$BIOROOT/,@bioroot)}/#{db_name}" else "#{@db_names[db_name].path}/#{db_name}" end end
deprecated
# File lib/bio/db/kegg/keggtab.rb, line 170 def db_path_by_abbrev(db_abbrev) db_name = name_by_abbrev(db_abbrev) db_path(db_name) end
Returns an array of taxonomy names the organism belongs. (e.g. 'eco' -> ['proteogamma','proteobacteria','eubacteria','genes']) This method has aliases as keggorg2taxo, korg2taxonomy, keggorg2taxonomy.
# File lib/bio/db/kegg/keggtab.rb, line 225 def korg2taxo(keggorg) tmp = Array.new traverse = Proc.new {|keggorg| @taxonomy.each do |k,v| if v.include?(keggorg) tmp.push(k) traverse.call(k) break end end } traverse.call(keggorg) return tmp end
Returns a canonical database name for the abbreviation. (e.g. 'ec' -> 'enzyme', 'hsa' -> 'h.sapies', ...)
# File lib/bio/db/kegg/keggtab.rb, line 120 def name(db_abbrev) if @database[db_abbrev] @database[db_abbrev].name end end
deprecated
# File lib/bio/db/kegg/keggtab.rb, line 165 def name_by_abbrev(db_abbrev) db_by_abbrev(db_abbrev).name end
Returns an absolute path for the flat file database. (e.g. '/bio/db/kegg/genes', ...)
# File lib/bio/db/kegg/keggtab.rb, line 128 def path(db_abbrev) if @database[db_abbrev] file = @database[db_abbrev].name if @bioroot "#{@database[db_abbrev].path.sub(/\$BIOROOT/,@bioroot)}/#{file}" else "#{@database[db_abbrev].path}/#{file}" end end end
List of all node labels from Taxonomy section. (e.g. ["actinobacteria", "animals", "archaea", "bacillales", ...)
# File lib/bio/db/kegg/keggtab.rb, line 192 def taxa_list @taxonomy.keys.sort end
Returns an array of organism names included in the specified taxon label. (e.g. 'proteobeta' -> ["nme", "nma", "rso"]) This method has taxo2keggorgs, taxon2korgs, and taxon2keggorgs aliases.
# File lib/bio/db/kegg/keggtab.rb, line 203 def taxo2korgs(node = 'genes') if node.length == 3 return node else if @taxonomy[node] tmp = Array.new @taxonomy[node].each do |x| tmp.push(taxo2korgs(x)) end return tmp else return nil end end end
Returns a hash containing Taxonomy section of the keggtab file. If argument is given, returns a List of all child nodes belongs to the label node. (e.g. "eukaryotes" -> ["animals", "plants", "protists", "fungi"], ...)
# File lib/bio/db/kegg/keggtab.rb, line 182 def taxonomy(node = nil) if node @taxonomy[node] else @taxonomy end end
Generated with the Darkfish Rdoc Generator 2.