{-# OPTIONS  #-}
-----------------------------------------------------------------------------
-- |
-- Module      : Language.Python.Version2.Lexer
-- Copyright   : (c) 2009 Bernie Pope 
-- License     : BSD-style
-- Maintainer  : bjpop@csse.unimelb.edu.au
-- Stability   : experimental
-- Portability : ghc
--
-- Lexical analysis for Python version 2.x programs. 
-- See: <http://docs.python.org/2.6/reference/lexical_analysis.html>.
-----------------------------------------------------------------------------

module Language.Python.Version2.Lexer (
   -- * Lexical analysis
   lex, 
   lexOneToken) where

import Prelude hiding (lex)
import Language.Python.Version2.Parser.Lexer (lexToken, initStartCodeStack)
import Language.Python.Common.Token as Token 
import Language.Python.Common.SrcLocation (initialSrcLocation)
import Language.Python.Common.ParserMonad 
       (ParseState (input), P, runParser, execParser, ParseError, initialState)

-- | Parse a string into a list of Python Tokens, or return an error. 
lex :: String -- ^ The input stream (python source code). 
    -> String -- ^ The name of the python source (filename or input device).
    -> Either ParseError [Token] -- ^ An error or a list of tokens.
lex :: String -> String -> Either ParseError [Token]
lex String
input String
srcName =
   P [Token] -> ParseState -> Either ParseError [Token]
forall a. P a -> ParseState -> Either ParseError a
execParser P [Token]
lexer ParseState
state
   where
   initLoc :: SrcLocation
initLoc = String -> SrcLocation
initialSrcLocation String
srcName
   state :: ParseState
state = SrcLocation -> String -> [Int] -> ParseState
initialState SrcLocation
initLoc String
input [Int]
initStartCodeStack

-- | Try to lex the first token in an input string. Return either a parse error
-- or a pair containing the next token and the rest of the input after the token.
lexOneToken :: String -- ^ The input stream (python source code).
         -> String -- ^ The name of the python source (filename or input device).
         -> Either ParseError (Token, String) -- ^ An error or the next token and the rest of the input after the token. 
lexOneToken :: String -> String -> Either ParseError (Token, String)
lexOneToken String
source String
srcName =
   case P Token -> ParseState -> Either ParseError (Token, ParseState)
forall a. P a -> ParseState -> Either ParseError (a, ParseState)
runParser P Token
lexToken ParseState
state of
      Left ParseError
err -> ParseError -> Either ParseError (Token, String)
forall a b. a -> Either a b
Left ParseError
err
      Right (Token
tok, ParseState
state) -> (Token, String) -> Either ParseError (Token, String)
forall a b. b -> Either a b
Right (Token
tok, ParseState -> String
input ParseState
state)
   where
   initLoc :: SrcLocation
initLoc = String -> SrcLocation
initialSrcLocation String
srcName
   state :: ParseState
state = SrcLocation -> String -> [Int] -> ParseState
initialState SrcLocation
initLoc String
source [Int]
initStartCodeStack

lexer :: P [Token]
lexer :: P [Token]
lexer = [Token] -> P [Token]
loop []
   where
   loop :: [Token] -> P [Token]
loop [Token]
toks = do
      Token
tok <- P Token
lexToken
      case Token
tok of
         EOFToken {} -> [Token] -> P [Token]
forall a. a -> StateT ParseState (Either ParseError) a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Token] -> [Token]
forall a. [a] -> [a]
reverse [Token]
toks)
         Token
other -> [Token] -> P [Token]
loop (Token
tokToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
toks)