Parent

RubyLexer

Constants

EOF
ESCAPES

Attributes

brace_nest[RW]
lex_state[R]

Additional context surrounding tokens that both the lexer and grammar use.

lex_strterm[RW]
lineno[W]
lpar_beg[RW]
paren_nest[RW]
parser[RW]
space_seen[RW]
src[R]

Stream of data that yylex examines.

string_buffer[RW]
token[RW]

Last token read via yylex.

version[RW]

What version of ruby to parse. 18 and 19 are the only valid values currently supported.

warnings[RW]

What handles warnings

yacc_value[RW]

Value of last token which had a value associated with it.

Public Class Methods

new(v = 18) click to toggle source
# File lib/ruby_lexer.rb, line 247
def initialize v = 18
  self.version = v
  self.cond   = RubyParserStuff::StackState.new(:cond)
  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
  self.tern   = RubyParserStuff::StackState.new(:tern)
  self.string_nest = 0
  self.paren_nest = 0
  self.brace_nest = 0
  self.lpar_beg = nil

  @comments = []

  reset
end

Public Instance Methods

advance() click to toggle source

How the parser advances to the next token.

@return true if not at end of file (EOF).

# File lib/ruby_lexer.rb, line 98
def advance
  r = yylex
  self.token = r

  raise "yylex returned nil" unless r

  return RubyLexer::EOF != r
end
arg_ambiguous() click to toggle source
# File lib/ruby_lexer.rb, line 107
def arg_ambiguous
  self.warning("Ambiguous first argument. make sure.")
end
comments() click to toggle source
# File lib/ruby_lexer.rb, line 111
def comments
  c = @comments.join
  @comments.clear
  c
end
expr_beg_push(val) click to toggle source
# File lib/ruby_lexer.rb, line 117
def expr_beg_push val
  cond.push false
  cmdarg.push false
  self.lex_state = :expr_beg
  self.yacc_value = val
end
fix_arg_lex_state() click to toggle source
# File lib/ruby_lexer.rb, line 124
def fix_arg_lex_state
  self.lex_state = if in_lex_state? :expr_fname, :expr_dot then
                     :expr_arg
                   else
                     :expr_beg
                   end
end
heredoc(here) click to toggle source
# File lib/ruby_lexer.rb, line 132
def heredoc here # 63 lines
  _, eos, func, last_line = here

  indent  = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
  expand  = (func & STR_FUNC_EXPAND) != 0
  eos_re  = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
  err_msg = "can't match #{eos_re.inspect} anywhere in "

  rb_compile_error err_msg if
    src.eos?

  if src.beginning_of_line? && src.scan(eos_re) then
    src.unread_many last_line # TODO: figure out how to remove this
    self.yacc_value = eos
    return :tSTRING_END
  end

  self.string_buffer = []

  if expand then
    case
    when src.scan(/#[$@]/) then
      src.pos -= 1 # FIX omg stupid
      self.yacc_value = src.matched
      return :tSTRING_DVAR
    when src.scan(/#[{]/) then
      self.yacc_value = src.matched
      return :tSTRING_DBEG
    when src.scan(/#/) then
      string_buffer << '#'
    end

    begin
      c = tokadd_string func, "\n", nil

      rb_compile_error err_msg if
        c == RubyLexer::EOF

      if c != "\n" then
        self.yacc_value = string_buffer.join.delete("\r")
        return :tSTRING_CONTENT
      else
        string_buffer << src.scan(/\n/)
      end

      rb_compile_error err_msg if
        src.eos?
    end until src.check(eos_re)
  else
    until src.check(eos_re) do
      string_buffer << src.scan(/.*(\n|\z)/)
      rb_compile_error err_msg if
        src.eos?
    end
  end

  self.lex_strterm = [:heredoc, eos, func, last_line]
  self.yacc_value = string_buffer.join.delete("\r")

  return :tSTRING_CONTENT
end
heredoc_identifier() click to toggle source
# File lib/ruby_lexer.rb, line 194
def heredoc_identifier # 51 lines
  term, func = nil, STR_FUNC_BORING
  self.string_buffer = []

  case
  when src.scan(/(-?)([\'\"\`])(.*?)\22//) then
    term = src[2]
    func |= STR_FUNC_INDENT unless src[1].empty?
    func |= case term
            when "\'" then
              STR_SQUOTE
            when '"' then
              STR_DQUOTE
            else
              STR_XQUOTE
            end
    string_buffer << src[3]
  when src.scan(/-?([\'\"\`])(?!\11**\Z)/) then
    rb_compile_error "unterminated here document identifier"
  when src.scan(/(-?)(#{IDENT_CHAR_RE}+)/) then
    term = '"'
    func |= STR_DQUOTE
    unless src[1].empty? then
      func |= STR_FUNC_INDENT
    end
    string_buffer << src[2]
  else
    return nil
  end

  if src.scan(/.*\n/) then
    # TODO: think about storing off the char range instead
    line = src.matched
    src.extra_lines_added += 1
  else
    line = nil
  end

  self.lex_strterm = [:heredoc, string_buffer.join, func, line]

  if term == '`' then
    self.yacc_value = "`"
    return :tXSTRING_BEG
  else
    self.yacc_value = "\""
    return :tSTRING_BEG
  end
end
in_lex_state?(*states) click to toggle source
# File lib/ruby_lexer.rb, line 243
def in_lex_state?(*states)
  states.include? lex_state
end
int_with_base(base) click to toggle source
# File lib/ruby_lexer.rb, line 262
def int_with_base base
  rb_compile_error "Invalid numeric format" if src.matched =~ /__/

  self.yacc_value = src.matched.to_i(base)
  return :tINTEGER
end
is_arg?() click to toggle source
# File lib/ruby_lexer.rb, line 1363
def is_arg?
  in_lex_state? :expr_arg, :expr_cmdarg
end
is_beg?() click to toggle source
# File lib/ruby_lexer.rb, line 1371
def is_beg?
  in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
end
is_end?() click to toggle source
# File lib/ruby_lexer.rb, line 1367
def is_end?
  in_lex_state? :expr_end, :expr_endarg, :expr_endfn
end
is_label_possible?(command_state) click to toggle source
# File lib/ruby_lexer.rb, line 1381
def is_label_possible? command_state
  (in_lex_state?(:expr_beg) && !command_state) || is_arg?
end
is_space_arg?(c = "x") click to toggle source

TODO define IS_AFTER_OPERATOR() IS_lex_state(EXPR_FNAME | EXPR_DOT)

# File lib/ruby_lexer.rb, line 1377
def is_space_arg? c = "x"
  is_arg? and space_seen and c !~ /\s/
end
lex_state=(o) click to toggle source
# File lib/ruby_lexer.rb, line 269
def lex_state= o
  # warn "wtf lex_state = #{o.inspect} from #{caller.first}"
  raise "wtf\?" unless Symbol === o
  @lex_state = o
end
lineno() click to toggle source
# File lib/ruby_lexer.rb, line 276
def lineno
  @lineno ||= src.lineno
end
parse_number() click to toggle source
Parse a number from the input stream.

@param c The first character of the number. @return A int constant wich represents a token.

# File lib/ruby_lexer.rb, line 286
def parse_number
  self.lex_state = :expr_end

  case
  when src.scan(/[+-]?0[xXbBdD]\b/) then
    rb_compile_error "Invalid numeric format"
  when src.scan(/[+-]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0[Dd][0-9_]+)/) then
    int_with_base(10)
  when src.scan(/[+-]?0x[a-f0-9_]+/) then
    int_with_base(16)
  when src.scan(/[+-]?0[Bb][01_]+/) then
    int_with_base(2)
  when src.scan(/[+-]?0[Oo]?[0-7_]*[89]/) then
    rb_compile_error "Illegal octal digit."
  when src.scan(/[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
    int_with_base(8)
  when src.scan(/[+-]?[\d_]+_(e|\.)/) then
    rb_compile_error "Trailing '_' in number."
  when src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then
    number = src.matched
    if number =~ /__/ then
      rb_compile_error "Invalid numeric format"
    end
    self.yacc_value = number.to_f
    :tFLOAT
  when src.scan(/[+-]?[0-9_]+(?![e])/) then
    int_with_base(10)
  else
    rb_compile_error "Bad number format"
  end
end
parse_quote() click to toggle source
# File lib/ruby_lexer.rb, line 318
def parse_quote # 58 lines
  beg, nnd, short_hand, c = nil, nil, false, nil

  if src.scan(/[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}).
    rb_compile_error "unknown type of %string" if src.matched_size == 2
    c, beg, short_hand = src.matched, src.getch, false
  else                               # Short-hand (e.g. %{, %., %!, etc)
    c, beg, short_hand = 'Q', src.getch, true
  end

  if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
    rb_compile_error "unterminated quoted string meets end of file"
  end

  # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
  nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
  nnd, beg = beg, "\00"" if nnd.nil?

  token_type, self.yacc_value = nil, "%#{c}#{beg}"
  token_type, string_type = case c
                            when 'Q' then
                              ch = short_hand ? nnd : c + beg
                              self.yacc_value = "%#{ch}"
                              [:tSTRING_BEG,   STR_DQUOTE]
                            when 'q' then
                              [:tSTRING_BEG,   STR_SQUOTE]
                            when 'W' then
                              src.scan(/\s*/)
                              [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
                            when 'w' then
                              src.scan(/\s*/)
                              [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
                            when 'x' then
                              [:tXSTRING_BEG,  STR_XQUOTE]
                            when 'r' then
                              [:tREGEXP_BEG,   STR_REGEXP]
                            when 's' then
                              self.lex_state  = :expr_fname
                              [:tSYMBEG,       STR_SSYM]
                            when 'I' then
                              [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
                            when 'i' then
                              [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
                            end

  rb_compile_error "Bad %string type. Expected [Qq\Wwxrs], found '#{c}'." if
    token_type.nil?

  self.lex_strterm = [:strterm, string_type, nnd, beg]

  return token_type
end
parse_string(quote) click to toggle source
# File lib/ruby_lexer.rb, line 371
def parse_string(quote) # 65 lines
  _, string_type, term, open = quote

  space = false # FIX: remove these
  func = string_type
  paren = open
  term_re = @@regexp_cache[term]

  qwords = (func & STR_FUNC_QWORDS) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  expand = (func & STR_FUNC_EXPAND) != 0

  unless func then # FIX: impossible, prolly needs == 0
    self.lineno = nil
    return :tSTRING_END
  end

  space = true if qwords and src.scan(/\s+/)

  if self.string_nest == 0 && src.scan(/#{term_re}/) then
    if qwords then
      quote[1] = nil # TODO: make struct
      return :tSPACE
    elsif regexp then
      self.yacc_value = self.regx_options
      self.lineno = nil
      return :tREGEXP_END
    else
      self.yacc_value = term
      self.lineno = nil
      return :tSTRING_END
    end
  end

  if space then
    return :tSPACE
  end

  self.string_buffer = []

  if expand
    case
    when src.scan(/#(?=[$@])/) then
      return :tSTRING_DVAR
    when src.scan(/#[{]/) then
      return :tSTRING_DBEG
    when src.scan(/#/) then
      string_buffer << '#'
    end
  end

  if tokadd_string(func, term, paren) == RubyLexer::EOF then
    rb_compile_error "unterminated string meets end of file"
  end

  self.yacc_value = string_buffer.join

  return :tSTRING_CONTENT
end
process_token(command_state) click to toggle source
# File lib/ruby_lexer.rb, line 1385
def process_token(command_state)
  token << src.matched if token =~ IDENT_RE && src.scan(/[\!\?](?!=)/)

  result = nil
  last_state = lex_state

  case token
  when /^\$/ then
    self.lex_state, result = :expr_end, :tGVAR
  when /^@@/ then
    self.lex_state, result = :expr_end, :tCVAR
  when /^@/ then
    self.lex_state, result = :expr_end, :tIVAR
  else
    if token =~ /[!?]$/ then
      result = :tFID
    else
      if in_lex_state? :expr_fname then
        # ident=, not =~ => == or followed by =>
        # TODO test lexing of a=>b vs a==>b
        if src.scan(/=(?:(?![~>=])|(?==>))/) then
          result = :tIDENTIFIER
          token << src.matched
        end
      end

      result ||= if token =~ /^[A-Z]/ then
                   :tCONSTANT
                 else
                   :tIDENTIFIER
                 end
    end

    unless ruby18
      if is_label_possible? command_state then
        colon = src.scan(/:/)

        if colon && src.peek(1) != ":" then
          self.lex_state = :expr_beg
          self.yacc_value = [token, src.lineno]
          return :tLABEL
        end

        src.unscan if colon
      end
    end

    unless in_lex_state? :expr_dot then
      # See if it is a reserved word.
      keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
                  RubyParserStuff::Keyword.keyword18 token
                else
                  RubyParserStuff::Keyword.keyword19 token
                end

      if keyword then
        state           = lex_state
        self.lex_state  = keyword.state
        self.yacc_value = [token, src.lineno]

        if state == :expr_fname then
          self.yacc_value = keyword.name
          return keyword.id0
        end

        self.command_start = true if lex_state == :expr_beg

        if keyword.id0 == :kDO then
          if lpar_beg && lpar_beg == paren_nest then
            self.lpar_beg = nil
            self.paren_nest -= 1

            return :kDO_LAMBDA
          end

          return :kDO_COND  if cond.is_in_state
          return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
          return :kDO_BLOCK if [:expr_beg, :expr_endarg].include? state
          return :kDO
        end

        return keyword.id0 if [:expr_beg, :expr_value].include? state

        self.lex_state = :expr_beg if keyword.id0 != keyword.id1

        return keyword.id1
      end
    end

    # TODO:
    # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {

    self.lex_state =
      if is_beg? || is_arg? || in_lex_state?(:expr_dot) then
        if command_state then
          :expr_cmdarg
        else
          :expr_arg
        end
      elsif !ruby18 && in_lex_state?(:expr_fname) then
        :expr_endfn
      else
        :expr_end
      end

  end

  self.yacc_value = token

  if (![:expr_dot, :expr_fname].include?(last_state) &&
      self.parser.env[token.to_sym] == :lvar) then
    self.lex_state = :expr_end
  end

  return result
end
rb_compile_error(msg) click to toggle source
# File lib/ruby_lexer.rb, line 431
def rb_compile_error msg
  msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
  raise RubyParser::SyntaxError, msg
end
read_escape() click to toggle source
# File lib/ruby_lexer.rb, line 436
def read_escape # 51 lines
  case
  when src.scan(/\\/) then                  # Backslash
    '\'
  when src.scan(/n/) then                   # newline
    "\n"
  when src.scan(/t/) then                   # horizontal tab
    "\t"
  when src.scan(/r/) then                   # carriage-return
    "\r"
  when src.scan(/f/) then                   # form-feed
    "\f"
  when src.scan(/v/) then                   # vertical tab
    "\113""
  when src.scan(/a/) then                   # alarm(bell)
    "\0007"
  when src.scan(/e/) then                   # escape
    "\0033"
  when src.scan(/b/) then                   # backspace
    "\0010"
  when src.scan(/s/) then                   # space
    " "
  when src.scan(/[0-7]{1,3}/) then          # octal constant
    (src.matched.to_i(8) & 0xFF).chr
  when src.scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
    src[1].to_i(16).chr
  when src.check(/M-\\[\\MCc]/) then
    src.scan(/M-\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord | 0x80).chr
    c
  when src.scan(/M-(.)/) then
    c = src[1]
    c[0] = (c[0].ord | 0x80).chr
    c
  when src.check(/(C-|c)\\[\\MCc]/) then
    src.scan(/(C-|c)\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord & 0x9f).chr
    c
  when src.scan(/C-\?|c\?/) then
    127.chr
  when src.scan(/(C-|c)(.)/) then
    c = src[2]
    c[0] = (c[0].ord & 0x9f).chr
    c
  when src.scan(/^[89]/) then # bad octal or hex... MRI ignores them :(
    src.matched
  when src.scan(/[McCx0-9]/) || src.eos? then
    rb_compile_error("Invalid escape character syntax")
  else
    src.getch
  end
end
regx_options() click to toggle source
# File lib/ruby_lexer.rb, line 491
def regx_options # 15 lines
  good, bad = [], []

  if src.scan(/[a-z]+/) then
    good, bad = src.matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
  end

  unless bad.empty? then
    rb_compile_error("unknown regexp option%s - %s" %
                     [(bad.size > 1 ? "s" : ""), bad.join.inspect])
  end

  return good.join
end
reset() click to toggle source
# File lib/ruby_lexer.rb, line 506
def reset
  self.command_start = true
  self.lex_strterm   = nil
  self.token         = nil
  self.yacc_value    = nil

  @src       = nil
  @lex_state = nil
end
ruby18() click to toggle source
# File lib/ruby_lexer.rb, line 516
def ruby18
  Ruby18Parser === parser
end
ruby19() click to toggle source
# File lib/ruby_lexer.rb, line 520
def ruby19
  Ruby19Parser === parser
end
src=(src) click to toggle source
# File lib/ruby_lexer.rb, line 524
def src= src
  raise "bad src: #{src.inspect}" unless String === src
  @src = RPStringScanner.new(src)
end
tokadd_escape(term) click to toggle source
# File lib/ruby_lexer.rb, line 529
def tokadd_escape term # 20 lines
  case
  when src.scan(/\\\n/) then
    # just ignore
  when src.scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
    self.string_buffer << src.matched
  when src.scan(/\\([MC]-|c)(?=\\)/) then
    self.string_buffer << src.matched
    self.tokadd_escape term
  when src.scan(/\\([MC]-|c)(.)/) then
    self.string_buffer << src.matched
  when src.scan(/\\[McCx]/) then
    rb_compile_error "Invalid escape character syntax"
  when src.scan(/\\(.)/) then
    self.string_buffer << src.matched
  else
    rb_compile_error "Invalid escape character syntax"
  end
end
tokadd_string(func, term, paren) click to toggle source
# File lib/ruby_lexer.rb, line 552
def tokadd_string(func, term, paren) # 105 lines
  qwords = (func & STR_FUNC_QWORDS) != 0
  escape = (func & STR_FUNC_ESCAPE) != 0
  expand = (func & STR_FUNC_EXPAND) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  symbol = (func & STR_FUNC_SYMBOL) != 0

  paren_re = @@regexp_cache[paren]
  term_re  = @@regexp_cache[term]

  until src.eos? do
    c = nil
    handled = true

    case
    when paren_re && src.scan(paren_re) then
      self.string_nest += 1
    when src.scan(term_re) then
      if self.string_nest == 0 then
        src.pos -= 1
        break
      else
        self.string_nest -= 1
      end
    when expand && src.scan(/#(?=[\$\@\{])/) then
      src.pos -= 1
      break
    when qwords && src.scan(/\s/) then
      src.pos -= 1
      break
    when expand && src.scan(/#(?!\n)/) then
      # do nothing
    when src.check(/\\/) then
      case
      when qwords && src.scan(/\\\n/) then
        string_buffer << "\n"
        next
      when qwords && src.scan(/\\\s/) then
        c = ' '
      when expand && src.scan(/\\\n/) then
        next
      when regexp && src.check(/\\/) then
        self.tokadd_escape term
        next
      when expand && src.scan(/\\/) then
        c = self.read_escape
      when src.scan(/\\\n/) then
        # do nothing
      when src.scan(/\\\\/) then
        string_buffer << '\' if escape
        c = '\'
      when src.scan(/\\/) then
        unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
          string_buffer << "\\"
        end
      else
        handled = false
      end # inner /\\/ case
    else
      handled = false
    end # top case

    unless handled then
      t = Regexp.escape term
      x = Regexp.escape(paren) if paren && paren != "\0000"
      re = if qwords then
             if RUBY19 then
               /[^#{t}#{x}\#\00\\\\s]+|./ # |. to pick up whatever
             else
               /[^#{t}#{x}\#\00\\\\s\v]+|./ # argh. 1.8's \s doesn't pick up \v
             end
           else
             /[^#{t}#{x}\#\00\\\]+|./
           end

      src.scan re
      c = src.matched

      rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\00//
    end # unless handled

    c ||= src.matched
    string_buffer << c
  end # until

  c ||= src.matched
  c = RubyLexer::EOF if src.eos?

  return c
end
unescape(s) click to toggle source
# File lib/ruby_lexer.rb, line 659
def unescape s
  r = ESCAPES[s]

  return r if r

  x = case s
      when /^[0-7]{1,3}/ then
        ($&.to_i(8) & 0xFF).chr
      when /^x([0-9a-fA-F]{1,2})/ then
        $1.to_i(16).chr
      when /^M-(.)/ then
        ($1[0].ord | 0x80).chr
      when /^(C-|c)(.)/ then
        ($2[0].ord & 0x9f).chr
      when /^[89a-f]/ then # bad octal or hex... ignore? that's what MRI does :(
        s
      when /^[McCx0-9]/ then
        rb_compile_error("Invalid escape character syntax")
      else
        s
      end
  x.force_encoding "UTF-8" if RUBY19
  x
end
warning(s) click to toggle source
# File lib/ruby_lexer.rb, line 684
def warning s
  # do nothing for now
end
yylex() click to toggle source

Returns the next token. Also sets yy_val is needed.

@return Description of the Returned Value

# File lib/ruby_lexer.rb, line 693
def yylex # 826 lines
  c = ''
  self.space_seen = false
  command_state = false
  src = self.src

  self.token = nil
  self.yacc_value = nil

  return yylex_string if lex_strterm

  command_state = self.command_start
  self.command_start = false

  last_state = lex_state

  loop do # START OF CASE
    if src.scan(/[\ \t\r\f\v]/) then # \s - \n + \v
      self.space_seen = true
      next
    elsif src.check(/[^a-zA-Z]/) then
      if src.scan(/\n|#/) then
        self.lineno = nil
        c = src.matched
        if c == '#' then
          src.pos -= 1

          while src.scan(/\s*#.*(\n+|\z)/) do
            @comments << src.matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
          end

          return RubyLexer::EOF if src.eos?
        end

        # Replace a string of newlines with a single one
        src.scan(/\n+/)

        next if in_lex_state?(:expr_beg, :expr_value, :expr_class,
                              :expr_fname, :expr_dot)

        if src.scan(/([\ \t\r\f\v]*)\./) then
          self.space_seen = true unless src[1].empty?

          src.pos -= 1
          next unless src.check(/\.\./)
        end

        self.command_start = true
        self.lex_state = :expr_beg
        return :tNL
      elsif src.scan(/[\]\)\}]/) then
        if src.matched == "}" then
          self.brace_nest -= 1
        else
          self.paren_nest -= 1
        end

        cond.lexpop
        cmdarg.lexpop
        tern.lexpop

        self.lex_state = if src.matched == ")" then
                           :expr_endfn
                         else
                           :expr_endarg
                         end

        self.yacc_value = src.matched
        result = {
          ")" => :tRPAREN,
          "]" => :tRBRACK,
          "}" => :tRCURLY
        }[src.matched]
        return result
      elsif src.scan(/\!/) then
        if in_lex_state?(:expr_fname, :expr_dot) then
          self.lex_state = :expr_arg

          if src.scan(/@/) then
            self.yacc_value = "!@"
            return :tUBANG
          end
        else
          self.lex_state = :expr_beg
        end

        if src.scan(/[=~]/) then
          self.yacc_value = "!#{src.matched}"
        else
          self.yacc_value = "!"
        end

        return TOKENS[self.yacc_value]
      elsif src.scan(/\.\.\.?|,|![=~]?/) then
        self.lex_state = :expr_beg
        tok = self.yacc_value = src.matched
        return TOKENS[tok]
      elsif src.check(/\./) then
        if src.scan(/\.\d/) then
          rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
        elsif src.scan(/\./) then
          self.lex_state = :expr_dot
          self.yacc_value = "."
          return :tDOT
        end
      elsif src.scan(/\(/) then
        result = if ruby18 then
                   yylex_paren18
                 else
                   yylex_paren19
                 end

        self.paren_nest += 1

        self.expr_beg_push "("

        return result
      elsif src.check(/\=/) then
        if src.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
          self.fix_arg_lex_state
          tok = self.yacc_value = src.matched
          return TOKENS[tok]
        elsif src.scan(/\=begin(?=\s)/) then
          @comments << src.matched

          unless src.scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/) then
            @comments.clear
            rb_compile_error("embedded document meets end of file")
          end

          @comments << src.matched

          next
        else
          raise "you shouldn't be able to get here"
        end
      elsif src.scan(/\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then
        self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
        self.lex_state = :expr_end
        return :tSTRING
      elsif src.scan(/\"/) then # FALLBACK
        self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this
        self.yacc_value = "\""
        return :tSTRING_BEG
      elsif src.scan(/\@\@?#{IDENT_CHAR_RE}+/) then
        self.token = src.matched

        rb_compile_error "`#{token}` is not allowed as a variable name" if
          token =~ /\@\d/

        return process_token(command_state)
      elsif src.scan(/\:\:/) then
        if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
          self.lex_state = :expr_beg
          self.yacc_value = "::"
          return :tCOLON3
        end

        self.lex_state = :expr_dot
        self.yacc_value = "::"
        return :tCOLON2
      elsif ! is_end? && src.scan(/:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
        # scanning shortcut to symbols
        self.yacc_value = src[1]
        self.lex_state = :expr_end
        return :tSYMBOL
      elsif src.scan(/\:/) then
        # ?: / then / when
        if is_end? || src.check(/\s/) then
          self.lex_state = :expr_beg
          # TODO warn_balanced(":", "symbol literal");
          self.yacc_value = ":"
          return :tCOLON
        end

        case
        when src.scan(/\'/) then
          self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""]
        when src.scan(/\"/) then
          self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""]
        end

        self.lex_state = :expr_fname
        self.yacc_value = ":"
        return :tSYMBEG
      elsif src.check(/[0-9]/) then
        return parse_number
      elsif src.scan(/\[/) then
        self.paren_nest += 1

        result = src.matched

        if in_lex_state? :expr_fname, :expr_dot then
          self.lex_state = :expr_arg
          case
          when src.scan(/\]\=/) then
            self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
            self.yacc_value = "[]="
            return :tASET
          when src.scan(/\]/) then
            self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
            self.yacc_value = "[]"
            return :tAREF
          else
            rb_compile_error "unexpected '['"
          end
        elsif is_beg? then
          self.tern.push false
          result = :tLBRACK
        elsif is_arg? && space_seen then
          self.tern.push false
          result = :tLBRACK
        else
          result = :tLBRACK2
        end

        self.expr_beg_push "["

        return result
      elsif src.scan(/\'(\\.|[^\'])*\'/) then
        self.yacc_value = src.matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") # "
        self.lex_state = :expr_end
        return :tSTRING
      elsif src.check(/\|/) then
        if src.scan(/\|\|\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "||"
          return :tOP_ASGN
        elsif src.scan(/\|\|/) then
          self.lex_state = :expr_beg
          self.yacc_value = "||"
          return :tOROP
        elsif src.scan(/\|\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "|"
          return :tOP_ASGN
        elsif src.scan(/\|/) then
          self.fix_arg_lex_state
          self.yacc_value = "|"
          return :tPIPE
        end
      elsif src.scan(/\{/) then
        self.brace_nest += 1
        if lpar_beg && lpar_beg == paren_nest then
          self.lpar_beg = nil
          self.paren_nest -= 1

          expr_beg_push "{"

          return :tLAMBEG
        end

        result = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
                   :tLCURLY      #  block (primary)
                 elsif in_lex_state?(:expr_endarg) then
                   :tLBRACE_ARG  #  block (expr)
                 else
                   self.tern.push false
                   :tLBRACE      #  hash
                 end

        self.expr_beg_push "{"
        self.command_start = true unless result == :tLBRACE

        return result
      elsif src.scan(/->/) then
        self.lex_state = :expr_endfn
        return :tLAMBDA
      elsif src.scan(/[+-]/) then
        sign = src.matched
        utype, type = if sign == "+" then
                        [:tUPLUS, :tPLUS]
                      else
                        [:tUMINUS, :tMINUS]
                      end

        if in_lex_state? :expr_fname, :expr_dot then
          self.lex_state = :expr_arg
          if src.scan(/@/) then
            self.yacc_value = "#{sign}@"
            return utype
          else
            self.yacc_value = sign
            return type
          end
        end

        if src.scan(/\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = sign
          return :tOP_ASGN
        end

        if (is_beg? || (is_arg? && space_seen && !src.check(/\s/))) then
          if is_arg? then
            arg_ambiguous
          end

          self.lex_state = :expr_beg
          self.yacc_value = sign

          if src.check(/\d/) then
            if utype == :tUPLUS then
              return self.parse_number
            else
              return :tUMINUS_NUM
            end
          end

          return utype
        end

        self.lex_state = :expr_beg
        self.yacc_value = sign
        return type
      elsif src.check(/\*/) then
        if src.scan(/\*\*=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "**"
          return :tOP_ASGN
        elsif src.scan(/\*\*/) then
          result = if is_space_arg? src.check(/./) then
                     warning "`**' interpreted as argument prefix"
                     :tDSTAR
                   elsif is_beg? then
                     :tDSTAR
                   else
                     # TODO: warn_balanced("**", "argument prefix");
                     :tPOW
                   end
          self.yacc_value = "**"
          self.fix_arg_lex_state
          return result
        elsif src.scan(/\*\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "*"
          return :tOP_ASGN
        elsif src.scan(/\*/) then
          result = if is_space_arg? src.check(/./) then
                     warning("`*' interpreted as argument prefix")
                     :tSTAR
                   elsif is_beg? then
                     :tSTAR
                   else
                     # TODO: warn_balanced("*", "argument prefix");
                     :tSTAR2 # TODO: rename
                   end

          self.yacc_value = "*"
          self.fix_arg_lex_state
          return result
        end
      elsif src.check(/\</) then
        if src.scan(/\<\=\>/) then
          self.fix_arg_lex_state
          self.yacc_value = "<=>"
          return :tCMP
        elsif src.scan(/\<\=/) then
          self.fix_arg_lex_state
          self.yacc_value = "<="
          return :tLEQ
        elsif src.scan(/\<\<\=/) then
          self.fix_arg_lex_state
          self.lex_state = :expr_beg
          self.yacc_value = "\<\<"
          return :tOP_ASGN
        elsif src.scan(/\<\</) then
          if (!in_lex_state?(:expr_dot, :expr_class) &&
              !is_end? &&
              (!is_arg? || space_seen)) then
            tok = self.heredoc_identifier
            return tok if tok
          end

          self.fix_arg_lex_state
          self.yacc_value = "\<\<"
          return :tLSHFT
        elsif src.scan(/\</) then
          self.fix_arg_lex_state
          self.yacc_value = "<"
          return :tLT
        end
      elsif src.check(/\>/) then
        if src.scan(/\>\=/) then
          self.fix_arg_lex_state
          self.yacc_value = ">="
          return :tGEQ
        elsif src.scan(/\>\>=/) then
          self.fix_arg_lex_state
          self.lex_state = :expr_beg
          self.yacc_value = ">>"
          return :tOP_ASGN
        elsif src.scan(/\>\>/) then
          self.fix_arg_lex_state
          self.yacc_value = ">>"
          return :tRSHFT
        elsif src.scan(/\>/) then
          self.fix_arg_lex_state
          self.yacc_value = ">"
          return :tGT
        end
      elsif src.scan(/\`/) then
        self.yacc_value = "`"
        case lex_state
        when :expr_fname then
          self.lex_state = :expr_end
          return :tBACK_REF2
        when :expr_dot then
          self.lex_state = if command_state then
                             :expr_cmdarg
                           else
                             :expr_arg
                           end
          return :tBACK_REF2
        end
        self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""]
        return :tXSTRING_BEG
      elsif src.scan(/\?/) then

        if is_end? then
          self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
          self.tern.push true
          self.yacc_value = "?"
          return :tEH
        end

        if src.eos? then
          rb_compile_error "incomplete character syntax"
        end

        if src.check(/\s|\v/) then
          unless is_arg? then
            c2 = { " " => 's',
                  "\n" => 'n',
                  "\t" => 't',
                  "\v" => 'v',
                  "\r" => 'r',
                  "\f" => 'f' }[src.matched]

            if c2 then
              warning("invalid character syntax; use ?\\" + c2)
            end
          end

          # ternary
          self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
          self.tern.push true
          self.yacc_value = "?"
          return :tEH
        elsif src.check(/\w(?=\w)/) then # ternary, also
          self.lex_state = :expr_beg
          self.tern.push true
          self.yacc_value = "?"
          return :tEH
        end

        c = if src.scan(/\\/) then
              self.read_escape
            else
              src.getch
            end
        self.lex_state = :expr_end

        if version == 18 then
          self.yacc_value = c[0].ord & 0xff
          return :tINTEGER
        else
          self.yacc_value = c
          return :tSTRING
        end
      elsif src.check(/\&/) then
        if src.scan(/\&\&\=/) then
          self.yacc_value = "&&"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        elsif src.scan(/\&\&/) then
          self.lex_state = :expr_beg
          self.yacc_value = "&&"
          return :tANDOP
        elsif src.scan(/\&\=/) then
          self.yacc_value = "&"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        elsif src.scan(/&/) then
          result = if is_arg? && space_seen &&
                       !src.check(/\s/) then
                     warning("`&' interpreted as argument prefix")
                     :tAMPER
                   elsif in_lex_state? :expr_beg, :expr_mid then
                     :tAMPER
                   else
                     :tAMPER2
                   end

          self.fix_arg_lex_state
          self.yacc_value = "&"
          return result
        end
      elsif src.scan(/\//) then
        if is_beg? then
          self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
          self.yacc_value = "/"
          return :tREGEXP_BEG
        end

        if src.scan(/\=/) then
          self.yacc_value = "/"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        end

        if is_arg? && space_seen then
          unless src.scan(/\s/) then
            arg_ambiguous
            self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
            self.yacc_value = "/"
            return :tREGEXP_BEG
          end
        end

        self.fix_arg_lex_state
        self.yacc_value = "/"

        return :tDIVIDE
      elsif src.scan(/\^=/) then
        self.lex_state = :expr_beg
        self.yacc_value = "^"
        return :tOP_ASGN
      elsif src.scan(/\^/) then
        self.fix_arg_lex_state
        self.yacc_value = "^"
        return :tCARET
      elsif src.scan(/\;/) then
        self.command_start = true
        self.lex_state = :expr_beg
        self.yacc_value = ";"
        return :tSEMI
      elsif src.scan(/\~/) then
        if in_lex_state? :expr_fname, :expr_dot then
          src.scan(/@/)
        end

        self.fix_arg_lex_state
        self.yacc_value = "~"

        return :tTILDE
      elsif src.scan(/\\/) then
        if src.scan(/\r?\n/) then
          self.lineno = nil
          self.space_seen = true
          next
        end
        rb_compile_error "bare backslash only allowed before newline"
      elsif src.scan(/\%/) then
        if is_beg? then
          return parse_quote
        end

        if src.scan(/\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "%"
          return :tOP_ASGN
        end

        return parse_quote if is_arg? && space_seen && ! src.check(/\s/)

        self.fix_arg_lex_state
        self.yacc_value = "%"

        return :tPERCENT
      elsif src.check(/\$/) then
        if src.scan(/(\$_)(\w+)/) then
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        elsif src.scan(/\$_/) then
          self.lex_state = :expr_end
          self.token = src.matched
          self.yacc_value = src.matched
          return :tGVAR
        elsif src.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
          self.lex_state = :expr_end
          self.yacc_value = src.matched
          return :tGVAR
        elsif src.scan(/\$([\&\`\'\+])/) then
          self.lex_state = :expr_end
          # Explicit reference to these vars as symbols...
          if last_state == :expr_fname then
            self.yacc_value = src.matched
            return :tGVAR
          else
            self.yacc_value = src[1].to_sym
            return :tBACK_REF
          end
        elsif src.scan(/\$([1-9]\d*)/) then
          self.lex_state = :expr_end
          if last_state == :expr_fname then
            self.yacc_value = src.matched
            return :tGVAR
          else
            self.yacc_value = src[1].to_i
            return :tNTH_REF
          end
        elsif src.scan(/\$0/) then
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        elsif src.scan(/\$\W|\$\z/) then # TODO: remove?
          self.lex_state = :expr_end
          self.yacc_value = "$"
          return "$"
        elsif src.scan(/\$\w+/)
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        end
      elsif src.check(/\_/) then
        if src.beginning_of_line? && src.scan(/\__END__(\r?\n|\Z)/) then
          self.lineno = nil
          return RubyLexer::EOF
        elsif src.scan(/\_\w*/) then
          self.token = src.matched
          return process_token(command_state)
        end
      end
    end # END OF CASE

    if src.scan(/\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF
      return RubyLexer::EOF
    else # alpha check
      rb_compile_error "Invalid char #{src.rest[0].chr} in expression" unless
        src.check IDENT_RE
    end

    self.token = src.matched if self.src.scan IDENT_RE

    return process_token(command_state)
  end
end
yylex_paren18() click to toggle source
# File lib/ruby_lexer.rb, line 1333
def yylex_paren18
  self.command_start = true
  result = :tLPAREN2

  if in_lex_state? :expr_beg, :expr_mid then
    result = :tLPAREN
  elsif space_seen then
    if in_lex_state? :expr_cmdarg then
      result = :tLPAREN_ARG
    elsif in_lex_state? :expr_arg then
      self.tern.push false
      warning "don't put space before argument parentheses"
    end
  else
    self.tern.push false
  end

  result
end
yylex_paren19() click to toggle source
# File lib/ruby_lexer.rb, line 1353
def yylex_paren19
  if is_beg? then
    :tLPAREN
  elsif is_space_arg? then
    :tLPAREN_ARG
  else
    :tLPAREN2 # plain '(' in parse.y
  end
end
yylex_string() click to toggle source
# File lib/ruby_lexer.rb, line 1502
def yylex_string # 23 lines
  token = if lex_strterm[0] == :heredoc then
            self.heredoc lex_strterm
          else
            self.parse_string lex_strterm
          end

  if token == :tSTRING_END || token == :tREGEXP_END then
    self.lineno      = nil
    self.lex_strterm = nil
    self.lex_state   = :expr_end
  end

  return token
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.