class Robots

Constants

DEFAULT_TIMEOUT

Public Class Methods

get_robots_txt(uri, user_agent) click to toggle source
# File lib/robots.rb, line 101
def self.get_robots_txt(uri, user_agent)
  begin
    Timeout::timeout(Robots.timeout) do
      io = URI.join(uri.to_s, "/robots.txt").open("User-Agent" => user_agent) rescue nil
    end 
  rescue Timeout::Error
    STDERR.puts "robots.txt request timed out"
  end
end
new(user_agent) click to toggle source
# File lib/robots.rb, line 119
def initialize(user_agent)
  @user_agent = user_agent
  @parsed = {}
end
timeout() click to toggle source
# File lib/robots.rb, line 115
def self.timeout
  @timeout || DEFAULT_TIMEOUT
end
timeout=(t) click to toggle source
# File lib/robots.rb, line 111
def self.timeout=(t)
  @timeout = t
end

Public Instance Methods

allowed?(uri) click to toggle source
# File lib/robots.rb, line 124
def allowed?(uri)
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
  host = uri.host
  @parsed[host] ||= ParsedRobots.new(uri, @user_agent)
  @parsed[host].allowed?(uri, @user_agent)
end
other_values(uri) click to toggle source
# File lib/robots.rb, line 131
def other_values(uri)
  uri = URI.parse(uri.to_s) unless uri.is_a?(URI)
  host = uri.host
  @parsed[host] ||= ParsedRobots.new(uri, @user_agent)
  @parsed[host].other_values
end