module Language.Haskell.Exts.InternalLexer (Token(..), showToken, lexer, topLexer) where
import Language.Haskell.Exts.ParseMonad
import Language.Haskell.Exts.SrcLoc
import Language.Haskell.Exts.Comments
import Language.Haskell.Exts.Extension
import Language.Haskell.Exts.ExtScheme
import Data.Char
import Data.Ratio
import Data.List (intersperse)
import Control.Monad (when)
data Token
= VarId String
| QVarId (String,String)
| IDupVarId (String)
| ILinVarId (String)
| ConId String
| QConId (String,String)
| DVarId [String]
| VarSym String
| ConSym String
| QVarSym (String,String)
| QConSym (String,String)
| IntTok (Integer, String)
| FloatTok (Rational, String)
| Character (Char, String)
| StringTok (String, String)
| IntTokHash (Integer, String)
| WordTokHash (Integer, String)
| FloatTokHash (Rational, String)
| DoubleTokHash (Rational, String)
| CharacterHash (Char, String)
| StringHash (String, String)
| LeftParen
| RightParen
| LeftHashParen
| RightHashParen
| LeftCurlyBar
| RightCurlyBar
| SemiColon
| LeftCurly
| RightCurly
| VRightCurly
| LeftSquare
| RightSquare
| Comma
| Underscore
| BackQuote
| Dot
| DotDot
| Colon
| DoubleColon
| Equals
| Backslash
| Bar
| LeftArrow
| RightArrow
| At
| Tilde
| DoubleArrow
| Minus
| Exclamation
| Star
| LeftArrowTail
| RightArrowTail
| LeftDblArrowTail
| RightDblArrowTail
| THExpQuote
| THPatQuote
| THDecQuote
| THTypQuote
| THCloseQuote
| THIdEscape (String)
| THParenEscape
| THVarQuote
| THTyQuote
| THQuasiQuote (String,String)
| RPGuardOpen
| RPGuardClose
| RPCAt
| XCodeTagOpen
| XCodeTagClose
| XStdTagOpen
| XStdTagClose
| XCloseTagOpen
| XEmptyTagClose
| XChildTagOpen
| XPCDATA String
| XRPatOpen
| XRPatClose
| PragmaEnd
| RULES
| INLINE Bool
| INLINE_CONLIKE
| SPECIALISE
| SPECIALISE_INLINE Bool
| SOURCE
| DEPRECATED
| WARNING
| SCC
| GENERATED
| CORE
| UNPACK
| OPTIONS (Maybe String,String)
| LANGUAGE
| ANN
| KW_As
| KW_By
| KW_Case
| KW_Class
| KW_Data
| KW_Default
| KW_Deriving
| KW_Do
| KW_MDo
| KW_Else
| KW_Family
| KW_Forall
| KW_Group
| KW_Hiding
| KW_If
| KW_Import
| KW_In
| KW_Infix
| KW_InfixL
| KW_InfixR
| KW_Instance
| KW_Let
| KW_Module
| KW_NewType
| KW_Of
| KW_Proc
| KW_Rec
| KW_Then
| KW_Type
| KW_Using
| KW_Where
| KW_Qualified
| KW_Foreign
| KW_Export
| KW_Safe
| KW_Unsafe
| KW_Threadsafe
| KW_Interruptible
| KW_StdCall
| KW_CCall
| KW_CPlusPlus
| KW_DotNet
| KW_Jvm
| KW_Js
| KW_CApi
| EOF
deriving (Eq,Show)
reserved_ops :: [(String,(Token, Maybe ExtScheme))]
reserved_ops = [
( "..", (DotDot, Nothing) ),
( ":", (Colon, Nothing) ),
( "::", (DoubleColon, Nothing) ),
( "=", (Equals, Nothing) ),
( "\\", (Backslash, Nothing) ),
( "|", (Bar, Nothing) ),
( "<-", (LeftArrow, Nothing) ),
( "->", (RightArrow, Nothing) ),
( "@", (At, Nothing) ),
( "@:", (RPCAt, Just (Any [RegularPatterns])) ),
( "~", (Tilde, Nothing) ),
( "=>", (DoubleArrow, Nothing) ),
( "*", (Star, Just (Any [KindSignatures])) ),
( "-<", (LeftArrowTail, Just (Any [Arrows])) ),
( ">-", (RightArrowTail, Just (Any [Arrows])) ),
( "-<<", (LeftDblArrowTail, Just (Any [Arrows])) ),
( ">>-", (RightDblArrowTail, Just (Any [Arrows])) ),
( "\x2190", (LeftArrow, Just (Any [UnicodeSyntax])) ),
( "\x2192", (RightArrow, Just (Any [UnicodeSyntax])) ),
( "\x21d2", (DoubleArrow, Just (Any [UnicodeSyntax])) ),
( "\x2237", (DoubleColon, Just (Any [UnicodeSyntax])) ),
( "\x2919", (LeftArrowTail, Just (All [UnicodeSyntax, Arrows])) ),
( "\x291a", (RightArrowTail, Just (All [UnicodeSyntax, Arrows])) ),
( "\x291b", (LeftDblArrowTail, Just (All [UnicodeSyntax, Arrows])) ),
( "\x291c", (RightDblArrowTail, Just (All [UnicodeSyntax, Arrows])) ),
( "\x2605", (Star, Just (All [UnicodeSyntax, KindSignatures])) )
]
special_varops :: [(String,(Token, Maybe ExtScheme))]
special_varops = [
( ".", (Dot, Just (Any [ExplicitForAll, ExistentialQuantification])) ),
( "-", (Minus, Nothing) ),
( "!", (Exclamation, Nothing) )
]
reserved_ids :: [(String,(Token, Maybe ExtScheme))]
reserved_ids = [
( "_", (Underscore, Nothing) ),
( "by", (KW_By, Just (Any [TransformListComp])) ),
( "case", (KW_Case, Nothing) ),
( "class", (KW_Class, Nothing) ),
( "data", (KW_Data, Nothing) ),
( "default", (KW_Default, Nothing) ),
( "deriving", (KW_Deriving, Nothing) ),
( "do", (KW_Do, Nothing) ),
( "else", (KW_Else, Nothing) ),
( "family", (KW_Family, Just (Any [TypeFamilies])) ),
( "forall", (KW_Forall, Just (Any [ExplicitForAll, ExistentialQuantification])) ),
( "group", (KW_Group, Just (Any [TransformListComp])) ),
( "if", (KW_If, Nothing) ),
( "import", (KW_Import, Nothing) ),
( "in", (KW_In, Nothing) ),
( "infix", (KW_Infix, Nothing) ),
( "infixl", (KW_InfixL, Nothing) ),
( "infixr", (KW_InfixR, Nothing) ),
( "instance", (KW_Instance, Nothing) ),
( "let", (KW_Let, Nothing) ),
( "mdo", (KW_MDo, Just (Any [RecursiveDo])) ),
( "module", (KW_Module, Nothing) ),
( "newtype", (KW_NewType, Nothing) ),
( "of", (KW_Of, Nothing) ),
( "proc", (KW_Proc, Just (Any [Arrows])) ),
( "rec", (KW_Rec, Just (Any [Arrows])) ),
( "then", (KW_Then, Nothing) ),
( "type", (KW_Type, Nothing) ),
( "using", (KW_Using, Just (Any [TransformListComp])) ),
( "where", (KW_Where, Nothing) ),
( "foreign", (KW_Foreign, Just (Any [ForeignFunctionInterface])) ),
( "\x2200", (KW_Forall, Just (All [UnicodeSyntax, ExplicitForAll])) )
]
special_varids :: [(String,(Token, Maybe ExtScheme))]
special_varids = [
( "as", (KW_As, Nothing) ),
( "qualified", (KW_Qualified, Nothing) ),
( "hiding", (KW_Hiding, Nothing) ),
( "export", (KW_Export, Just (Any [ForeignFunctionInterface])) ),
( "safe", (KW_Safe, Just (Any [ForeignFunctionInterface])) ),
( "unsafe", (KW_Unsafe, Just (Any [ForeignFunctionInterface])) ),
( "threadsafe", (KW_Threadsafe, Just (Any [ForeignFunctionInterface])) ),
( "interruptible", (KW_Interruptible, Just (Any [InterruptibleFFI])) ),
( "stdcall", (KW_StdCall, Just (Any [ForeignFunctionInterface])) ),
( "ccall", (KW_CCall, Just (Any [ForeignFunctionInterface])) ),
( "cplusplus", (KW_CPlusPlus, Just (Any [ForeignFunctionInterface])) ),
( "dotnet", (KW_DotNet, Just (Any [ForeignFunctionInterface])) ),
( "jvm", (KW_Jvm, Just (Any [ForeignFunctionInterface])) ),
( "js", (KW_Js, Just (Any [ForeignFunctionInterface])) ),
( "capi", (KW_CApi, Just (Any [CApiFFI])) )
]
pragmas :: [(String,Token)]
pragmas = [
( "rules", RULES ),
( "inline", INLINE True ),
( "noinline", INLINE False ),
( "notinline", INLINE False ),
( "specialise", SPECIALISE ),
( "specialize", SPECIALISE ),
( "source", SOURCE ),
( "deprecated", DEPRECATED ),
( "warning", WARNING ),
( "ann", ANN ),
( "scc", SCC ),
( "generated", GENERATED ),
( "core", CORE ),
( "unpack", UNPACK ),
( "language", LANGUAGE ),
( "options", OPTIONS undefined )
]
isIdent, isHSymbol :: Char -> Bool
isIdent c = isAlpha c || isDigit c || c == '\'' || c == '_'
isHSymbol c = c `elem` ":!#%&*./?@\\-" || ((isSymbol c || isPunctuation c) && not (c `elem` "(),;[]`{}_\"'"))
matchChar :: Char -> String -> Lex a ()
matchChar c msg = do
s <- getInput
if null s || head s /= c then fail msg else discard 1
lexer :: (Loc Token -> P a) -> P a
lexer = runL topLexer
topLexer :: Lex a (Loc Token)
topLexer = do
b <- pullCtxtFlag
if b then
setBOL >> getSrcLocL >>= \l -> return (Loc (mkSrcSpan l l) VRightCurly)
else do
bol <- checkBOL
(bol, ws) <- lexWhiteSpace bol
ec <- getExtContext
case ec of
Just ChildCtxt | not bol && ws -> getSrcLocL >>= \l -> return $ Loc (mkSrcSpan l l) $ XPCDATA " "
_ -> do startToken
sl <- getSrcLocL
t <- if bol then lexBOL
else lexToken
el <- getSrcLocL
return $ Loc (mkSrcSpan sl el) t
lexWhiteSpace :: Bool -> Lex a (Bool, Bool)
lexWhiteSpace bol = do
s <- getInput
ignL <- ignoreLinePragmasL
case s of
'{':'-':'#':rest | isRecognisedPragma rest -> return (bol, False)
| isLinePragma rest && not ignL -> do
(l, fn) <- lexLinePragma
setSrcLineL l
setLineFilenameL fn
lexWhiteSpace True
'{':'-':_ -> do
loc <- getSrcLocL
discard 2
(bol, c) <- lexNestedComment bol ""
loc2 <- getSrcLocL
pushComment $ Comment True (mkSrcSpan loc loc2) (reverse c)
(bol, _) <- lexWhiteSpace bol
return (bol, True)
'-':'-':s | all (== '-') (takeWhile isHSymbol s) -> do
loc <- getSrcLocL
discard 2
dashes <- lexWhile (== '-')
rest <- lexWhile (/= '\n')
s' <- getInput
loc2 <- getSrcLocL
let com = Comment False (mkSrcSpan loc loc2) $ dashes ++ rest
case s' of
[] -> pushComment com >> return (False, True)
_ -> do
pushComment com
lexNewline
lexWhiteSpace True
return (True, True)
'\n':_ -> do
lexNewline
lexWhiteSpace True
return (True, True)
'\t':_ -> do
lexTab
(bol, _) <- lexWhiteSpace bol
return (bol, True)
c:_ | isSpace c -> do
discard 1
(bol, _) <- lexWhiteSpace bol
return (bol, True)
_ -> return (bol, False)
isRecognisedPragma, isLinePragma :: String -> Bool
isRecognisedPragma str = let pragma = map toLower . takeWhile isAlphaNum . dropWhile isSpace $ str
in case lookup pragma pragmas of
Nothing -> False
_ -> True
isLinePragma str = let pragma = map toLower . takeWhile isAlphaNum . dropWhile isSpace $ str
in case pragma of
"line" -> True
_ -> False
lexLinePragma :: Lex a (Int, String)
lexLinePragma = do
discard 3
lexWhile isSpace
discard 4
lexWhile isSpace
i <- lexWhile isDigit
when (null i) $ fail "Improperly formatted LINE pragma"
lexWhile isSpace
matchChar '"' "Improperly formatted LINE pragma"
fn <- lexWhile (/= '"')
matchChar '"' "Impossible - lexLinePragma"
lexWhile isSpace
mapM (flip matchChar "Improperly formatted LINE pragma") "#-}"
lexNewline
return (read i, fn)
lexNestedComment :: Bool -> String -> Lex a (Bool, String)
lexNestedComment bol str = do
s <- getInput
case s of
'-':'}':_ -> discard 2 >> return (bol, str)
'{':'-':_ -> do
discard 2
(bol, c) <- lexNestedComment bol ("-{" ++ str)
lexNestedComment bol ("}-" ++ c )
'\t':_ -> lexTab >> lexNestedComment bol ('\t':str)
'\n':_ -> lexNewline >> lexNestedComment True ('\n':str)
c:_ -> discard 1 >> lexNestedComment bol (c:str)
[] -> fail "Unterminated nested comment"
lexBOL :: Lex a Token
lexBOL = do
pos <- getOffside
case pos of
LT -> do
setBOL
popContextL "lexBOL"
return VRightCurly
EQ ->
return SemiColon
GT -> lexToken
lexToken :: Lex a Token
lexToken = do
ec <- getExtContext
case ec of
Just HarpCtxt -> lexHarpToken
Just TagCtxt -> lexTagCtxt
Just CloseTagCtxt -> lexCloseTagCtxt
Just ChildCtxt -> lexChildCtxt
Just CodeTagCtxt -> lexCodeTagCtxt
_ -> lexStdToken
lexChildCtxt :: Lex a Token
lexChildCtxt = do
s <- getInput
case s of
'<':'%':'>':_ -> do discard 3
pushExtContextL ChildCtxt
return XChildTagOpen
'<':'%':_ -> do discard 2
pushExtContextL CodeTagCtxt
return XCodeTagOpen
'<':'/':_ -> do discard 2
popExtContextL "lexChildCtxt"
pushExtContextL CloseTagCtxt
return XCloseTagOpen
'<':'[':_ -> do discard 2
pushExtContextL HarpCtxt
return XRPatOpen
'<':_ -> do discard 1
pushExtContextL TagCtxt
return XStdTagOpen
_ -> lexPCDATA
lexPCDATA :: Lex a Token
lexPCDATA = do
s <- getInput
case s of
[] -> return EOF
_ -> case s of
'\n':_ -> do
x <- lexNewline >> lexPCDATA
case x of
XPCDATA p -> return $ XPCDATA $ '\n':p
EOF -> return EOF
'<':_ -> return $ XPCDATA ""
_ -> do let pcd = takeWhile (\c -> not $ elem c "<\n") s
l = length pcd
discard l
x <- lexPCDATA
case x of
XPCDATA pcd' -> return $ XPCDATA $ pcd ++ pcd'
EOF -> return EOF
lexCodeTagCtxt :: Lex a Token
lexCodeTagCtxt = do
s <- getInput
case s of
'%':'>':_ -> do discard 2
popExtContextL "lexCodeTagContext"
return XCodeTagClose
_ -> lexStdToken
lexCloseTagCtxt :: Lex a Token
lexCloseTagCtxt = do
s <- getInput
case s of
'%':'>':_ -> do discard 2
popExtContextL "lexCloseTagCtxt"
return XCodeTagClose
'>':_ -> do discard 1
popExtContextL "lexCloseTagCtxt"
return XStdTagClose
_ -> lexStdToken
lexTagCtxt :: Lex a Token
lexTagCtxt = do
s <- getInput
case s of
'/':'>':_ -> do discard 2
popExtContextL "lexTagCtxt: Empty tag"
return XEmptyTagClose
'>':_ -> do discard 1
popExtContextL "lexTagCtxt: Standard tag"
pushExtContextL ChildCtxt
return XStdTagClose
_ -> lexStdToken
lexHarpToken :: Lex a Token
lexHarpToken = do
s <- getInput
case s of
']':'>':_ -> do discard 2
popExtContextL "lexHarpToken"
return XRPatClose
_ -> lexStdToken
lexStdToken :: Lex a Token
lexStdToken = do
s <- getInput
exts <- getExtensionsL
let intHash = lexHash IntTok IntTokHash (Right WordTokHash)
case s of
[] -> return EOF
'0':c:d:_ | toLower c == 'o' && isOctDigit d -> do
discard 2
(n, str) <- lexOctal
con <- intHash
return (con (n, '0':c:str))
| toLower c == 'x' && isHexDigit d -> do
discard 2
(n, str) <- lexHexadecimal
con <- intHash
return (con (n, '0':c:str))
'?':c:_ | isLower c && ImplicitParams `elem` exts -> do
discard 1
id <- lexWhile isIdent
return $ IDupVarId id
'%':c:_ | isLower c && ImplicitParams `elem` exts -> do
discard 1
id <- lexWhile isIdent
return $ ILinVarId id
'(':'|':c:_ | RegularPatterns `elem` exts && not (isHSymbol c) ->
do discard 2
return RPGuardOpen
'|':')':_ | RegularPatterns `elem` exts ->
do discard 2
return RPGuardClose
'[':'|':_ | TemplateHaskell `elem` exts -> do
discard 2
return $ THExpQuote
'[':c:'|':_ | c == 'e' && TemplateHaskell `elem` exts -> do
discard 3
return $ THExpQuote
| c == 'p' && TemplateHaskell `elem` exts -> do
discard 3
return THPatQuote
| c == 'd' && TemplateHaskell `elem` exts -> do
discard 3
return THDecQuote
| c == 't' && TemplateHaskell `elem` exts -> do
discard 3
return THTypQuote
'[':'$':c:_ | isLower c && QuasiQuotes `elem` exts ->
discard 2 >> lexQuasiQuote
'[':c:s | isLower c && QuasiQuotes `elem` exts && case dropWhile isIdent s of { '|':_ -> True;_->False} ->
discard 1 >> lexQuasiQuote
'|':']':_ | TemplateHaskell `elem` exts -> do
discard 2
return THCloseQuote
'$':c:_ | isLower c && TemplateHaskell `elem` exts -> do
discard 1
id <- lexWhile isIdent
return $ THIdEscape id
| c == '(' && TemplateHaskell `elem` exts -> do
discard 2
return THParenEscape
'<':'%':c:_ | XmlSyntax `elem` exts -> do
case c of
'>' -> do discard 3
pushExtContextL ChildCtxt
return XChildTagOpen
_ -> do discard 2
pushExtContextL CodeTagCtxt
return XCodeTagOpen
'<':c:_ | isAlpha c && XmlSyntax `elem` exts -> do
discard 1
pushExtContextL TagCtxt
return XStdTagOpen
'(':'#':c:_ | UnboxedTuples `elem` exts && not (isHSymbol c) -> do discard 2 >> return LeftHashParen
'#':')':_ | UnboxedTuples `elem` exts -> do discard 2 >> return RightHashParen
'{':'|':_ | Generics `elem` exts -> do discard 2 >> return LeftCurlyBar
'|':'}':_ | Generics `elem` exts -> do discard 2 >> return RightCurlyBar
'{':'-':'#':_ -> do discard 3 >> lexPragmaStart
'#':'-':'}':_ -> do discard 3 >> return PragmaEnd
c:_ | isDigit c -> lexDecimalOrFloat
| isUpper c -> lexConIdOrQual ""
| isLower c || c == '_' -> do
idents <- lexIdents
case idents of
[ident] -> case lookup ident (reserved_ids ++ special_varids) of
Just (keyword, scheme) -> do
if isEnabled scheme exts
then flagKW keyword >> return keyword
else return $ VarId ident
Nothing -> return $ VarId ident
_ -> return $ DVarId idents
| isHSymbol c -> do
sym <- lexWhile isHSymbol
return $ case lookup sym (reserved_ops ++ special_varops) of
Just (t , scheme) ->
if isEnabled scheme exts
then t
else case c of
':' -> ConSym sym
_ -> VarSym sym
Nothing -> case c of
':' -> ConSym sym
_ -> VarSym sym
| otherwise -> do
discard 1
case c of
'(' -> return LeftParen
')' -> return RightParen
',' -> return Comma
';' -> return SemiColon
'[' -> return LeftSquare
']' -> return RightSquare
'`' -> return BackQuote
'{' -> do
pushContextL NoLayout
return LeftCurly
'}' -> do
popContextL "lexStdToken"
return RightCurly
'\'' -> lexCharacter
'"' -> lexString
_ -> fail ("Illegal character \'" ++ show c ++ "\'\n")
where lexIdents :: Lex a [String]
lexIdents = do
ident <- lexWhile isIdent
s <- getInput
exts <- getExtensionsL
case s of
'-':c:_ | XmlSyntax `elem` exts && isAlpha c -> do
discard 1
idents <- lexIdents
return $ ident : idents
'#':_ | MagicHash `elem` exts -> do
discard 1
return [ident ++ "#"]
_ -> return [ident]
lexQuasiQuote :: Lex a Token
lexQuasiQuote = do
ident <- lexWhile isIdent
matchChar '|' "Malformed quasi-quote quoter"
body <- lexQQBody
return $ THQuasiQuote (ident, body)
lexQQBody :: Lex a String
lexQQBody = do
s <- getInput
case s of
'\\':']':_ -> do discard 2
str <- lexQQBody
return (']':str)
'\\':'|':_ -> do discard 2
str <- lexQQBody
return ('|':str)
'|':']':_ -> discard 2 >> return ""
'|':_ -> do discard 1
str <- lexQQBody
return ('|':str)
']':_ -> do discard 1
str <- lexQQBody
return (']':str)
'\\':_ -> do discard 1
str <- lexQQBody
return ('\\':str)
'\n':_ -> do lexNewline
str <- lexQQBody
return ('\n':str)
[] -> fail "Unexpected end of input while lexing quasi-quoter"
_ -> do str <- lexWhile (not . (`elem` "\\|\n"))
rest <- lexQQBody
return (str++rest)
lexPragmaStart :: Lex a Token
lexPragmaStart = do
lexWhile isSpace
pr <- lexWhile isAlphaNum
case lookup (map toLower pr) pragmas of
Just (INLINE True) -> do
s <- getInput
case map toLower s of
'_':'c':'o':'n':'l':'i':'k':'e':_ -> do
discard 8
return $ INLINE_CONLIKE
_ -> return $ INLINE True
Just SPECIALISE -> do
s <- getInput
case dropWhile isSpace $ map toLower s of
'i':'n':'l':'i':'n':'e':_ -> do
lexWhile isSpace
discard 6
return $ SPECIALISE_INLINE True
'n':'o':'i':'n':'l':'i':'n':'e':_ -> do
lexWhile isSpace
discard 8
return $ SPECIALISE_INLINE False
'n':'o':'t':'i':'n':'l':'i':'n':'e':_ -> do
lexWhile isSpace
discard 9
return $ SPECIALISE_INLINE False
_ -> return SPECIALISE
Just (OPTIONS _) -> do
s <- getInput
case s of
'_':_ -> do
discard 1
com <- lexWhile isIdent
rest <- lexRawPragma
return $ OPTIONS (Just com, rest)
x:_ | isSpace x -> do
rest <- lexRawPragma
return $ OPTIONS (Nothing, rest)
_ -> fail "Malformed Options pragma"
Just p -> return p
_ -> fail "Internal error: Unrecognised recognised pragma"
lexRawPragma :: Lex a String
lexRawPragma = do
rpr <- lexRawPragmaAux
return $ dropWhile isSpace rpr
where lexRawPragmaAux = do
rpr <- lexWhile (/='#')
s <- getInput
case s of
'#':'-':'}':_ -> return rpr
"" -> fail "End-of-file inside pragma"
_ -> do
discard 1
rpr' <- lexRawPragma
return $ rpr ++ '#':rpr'
lexDecimalOrFloat :: Lex a Token
lexDecimalOrFloat = do
ds <- lexWhile isDigit
rest <- getInput
exts <- getExtensionsL
case rest of
('.':d:_) | isDigit d -> do
discard 1
frac <- lexWhile isDigit
let num = parseInteger 10 (ds ++ frac)
decimals = toInteger (length frac)
(exponent, estr) <- do
rest2 <- getInput
case rest2 of
'e':_ -> lexExponent
'E':_ -> lexExponent
_ -> return (0,"")
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
return $ con ((num%1) * 10^^(exponent decimals), ds ++ '.':frac ++ estr)
e:_ | toLower e == 'e' -> do
(exponent, estr) <- lexExponent
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
return $ con ((parseInteger 10 ds%1) * 10^^exponent, ds ++ estr)
'#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 ds, ds))
'#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 ds, ds))
_ -> return (IntTok (parseInteger 10 ds, ds))
where
lexExponent :: Lex a (Integer, String)
lexExponent = do
(e:r) <- getInput
discard 1
case r of
'+':d:_ | isDigit d -> do
discard 1
(n, str) <- lexDecimal
return (n, e:'+':str)
'-':d:_ | isDigit d -> do
discard 1
(n, str) <- lexDecimal
return (negate n, e:'-':str)
d:_ | isDigit d -> lexDecimal >>= \(n,str) -> return (n, e:str)
_ -> fail "Float with missing exponent"
lexHash :: (b -> Token) -> (b -> Token) -> Either String (b -> Token) -> Lex a (b -> Token)
lexHash a b c = do
exts <- getExtensionsL
if MagicHash `elem` exts
then do
r <- getInput
case r of
'#':'#':_ -> case c of
Right c -> discard 2 >> return c
Left s -> fail s
'#':_ -> discard 1 >> return b
_ -> return a
else return a
lexConIdOrQual :: String -> Lex a Token
lexConIdOrQual qual = do
con <- lexWhile isIdent
let conid | null qual = ConId con
| otherwise = QConId (qual,con)
qual' | null qual = con
| otherwise = qual ++ '.':con
just_a_conid <- alternative (return conid)
rest <- getInput
exts <- getExtensionsL
case rest of
'.':c:_
| isLower c || c == '_' -> do
discard 1
ident <- lexWhile isIdent
s <- getInput
exts <- getExtensionsL
ident' <- case s of
'#':_ | MagicHash `elem` exts -> discard 1 >> return (ident ++ "#")
_ -> return ident
case lookup ident' reserved_ids of
Just (_,scheme) | isEnabled scheme exts -> just_a_conid
_ -> return (QVarId (qual', ident'))
| isUpper c -> do
discard 1
lexConIdOrQual qual'
| isHSymbol c -> do
discard 1
sym <- lexWhile isHSymbol
exts <- getExtensionsL
case lookup sym reserved_ops of
Just (_,scheme) | isEnabled scheme exts -> just_a_conid
_ -> return $ case c of
':' -> QConSym (qual', sym)
_ -> QVarSym (qual', sym)
'#':cs
| null cs ||
not (isHSymbol $ head cs) &&
not (isIdent $ head cs) && MagicHash `elem` exts -> do
discard 1
case conid of
ConId con -> return $ ConId $ con ++ "#"
QConId (q,con) -> return $ QConId (q,con ++ "#")
_ -> return conid
lexCharacter :: Lex a Token
lexCharacter = do
s <- getInput
exts <- getExtensionsL
case s of
'\'':_ | TemplateHaskell `elem` exts -> discard 1 >> return THTyQuote
'\\':_ -> do
(c,raw) <- lexEscape
matchQuote
con <- lexHash Character CharacterHash
(Left "Double hash not available for character literals")
return (con (c, '\\':raw))
c:'\'':_ -> do
discard 2
con <- lexHash Character CharacterHash
(Left "Double hash not available for character literals")
return (con (c, [c]))
_ | TemplateHaskell `elem` exts -> return THVarQuote
_ -> fail "Improper character constant or misplaced \'"
where matchQuote = matchChar '\'' "Improperly terminated character constant"
lexString :: Lex a Token
lexString = loop ("","")
where
loop (s,raw) = do
r <- getInput
exts <- getExtensionsL
case r of
'\\':'&':_ -> do
discard 2
loop (s, '&':'\\':raw)
'\\':c:_ | isSpace c -> do
discard 1
wcs <- lexWhiteChars
matchChar '\\' "Illegal character in string gap"
loop (s, '\\':reverse wcs ++ '\\':raw)
| otherwise -> do
(ce, str) <- lexEscape
loop (ce:s, reverse str ++ '\\':raw)
'"':'#':_ | MagicHash `elem` exts -> do
discard 2
return (StringHash (reverse s, reverse raw))
'"':_ -> do
discard 1
return (StringTok (reverse s, reverse raw))
c:_ | c /= '\n' -> do
discard 1
loop (c:s, c:raw)
_ -> fail "Improperly terminated string"
lexWhiteChars :: Lex a String
lexWhiteChars = do
s <- getInput
case s of
'\n':_ -> do
lexNewline
wcs <- lexWhiteChars
return $ '\n':wcs
'\t':_ -> do
lexTab
wcs <- lexWhiteChars
return $ '\t':wcs
c:_ | isSpace c -> do
discard 1
wcs <- lexWhiteChars
return $ c:wcs
_ -> return ""
lexEscape :: Lex a (Char, String)
lexEscape = do
discard 1
r <- getInput
case r of
'a':_ -> discard 1 >> return ('\a', "a")
'b':_ -> discard 1 >> return ('\b', "b")
'f':_ -> discard 1 >> return ('\f', "f")
'n':_ -> discard 1 >> return ('\n', "n")
'r':_ -> discard 1 >> return ('\r', "r")
't':_ -> discard 1 >> return ('\t', "t")
'v':_ -> discard 1 >> return ('\v', "v")
'\\':_ -> discard 1 >> return ('\\', "\\")
'"':_ -> discard 1 >> return ('\"', "\"")
'\'':_ -> discard 1 >> return ('\'', "\'")
'^':c:_ -> discard 2 >> cntrl c
'N':'U':'L':_ -> discard 3 >> return ('\NUL', "NUL")
'S':'O':'H':_ -> discard 3 >> return ('\SOH', "SOH")
'S':'T':'X':_ -> discard 3 >> return ('\STX', "STX")
'E':'T':'X':_ -> discard 3 >> return ('\ETX', "ETX")
'E':'O':'T':_ -> discard 3 >> return ('\EOT', "EOT")
'E':'N':'Q':_ -> discard 3 >> return ('\ENQ', "ENQ")
'A':'C':'K':_ -> discard 3 >> return ('\ACK', "ACK")
'B':'E':'L':_ -> discard 3 >> return ('\BEL', "BEL")
'B':'S':_ -> discard 2 >> return ('\BS', "BS")
'H':'T':_ -> discard 2 >> return ('\HT', "HT")
'L':'F':_ -> discard 2 >> return ('\LF', "LF")
'V':'T':_ -> discard 2 >> return ('\VT', "VT")
'F':'F':_ -> discard 2 >> return ('\FF', "FF")
'C':'R':_ -> discard 2 >> return ('\CR', "CR")
'S':'O':_ -> discard 2 >> return ('\SO', "SO")
'S':'I':_ -> discard 2 >> return ('\SI', "SI")
'D':'L':'E':_ -> discard 3 >> return ('\DLE', "DLE")
'D':'C':'1':_ -> discard 3 >> return ('\DC1', "DC1")
'D':'C':'2':_ -> discard 3 >> return ('\DC2', "DC2")
'D':'C':'3':_ -> discard 3 >> return ('\DC3', "DC3")
'D':'C':'4':_ -> discard 3 >> return ('\DC4', "DC4")
'N':'A':'K':_ -> discard 3 >> return ('\NAK', "NAK")
'S':'Y':'N':_ -> discard 3 >> return ('\SYN', "SYN")
'E':'T':'B':_ -> discard 3 >> return ('\ETB', "ETB")
'C':'A':'N':_ -> discard 3 >> return ('\CAN', "CAN")
'E':'M':_ -> discard 2 >> return ('\EM', "EM")
'S':'U':'B':_ -> discard 3 >> return ('\SUB', "SUB")
'E':'S':'C':_ -> discard 3 >> return ('\ESC', "ESC")
'F':'S':_ -> discard 2 >> return ('\FS', "FS")
'G':'S':_ -> discard 2 >> return ('\GS', "GS")
'R':'S':_ -> discard 2 >> return ('\RS', "RS")
'U':'S':_ -> discard 2 >> return ('\US', "US")
'S':'P':_ -> discard 2 >> return ('\SP', "SP")
'D':'E':'L':_ -> discard 3 >> return ('\DEL', "DEL")
'o':c:_ | isOctDigit c -> do
discard 1
(n, raw) <- lexOctal
n <- checkChar n
return (n, 'o':raw)
'x':c:_ | isHexDigit c -> do
discard 1
(n, raw) <- lexHexadecimal
n <- checkChar n
return (n, 'x':raw)
c:_ | isDigit c -> do
(n, raw) <- lexDecimal
n <- checkChar n
return (n, raw)
_ -> fail "Illegal escape sequence"
where
checkChar n | n <= 0x10FFFF = return (chr (fromInteger n))
checkChar _ = fail "Character constant out of range"
cntrl :: Char -> Lex a (Char, String)
cntrl c | c >= '@' && c <= '_' = return (chr (ord c ord '@'), '^':c:[])
cntrl _ = fail "Illegal control character"
lexOctal :: Lex a (Integer, String)
lexOctal = do
ds <- lexWhile isOctDigit
return (parseInteger 8 ds, ds)
lexHexadecimal :: Lex a (Integer, String)
lexHexadecimal = do
ds <- lexWhile isHexDigit
return (parseInteger 16 ds, ds)
lexDecimal :: Lex a (Integer, String)
lexDecimal = do
ds <- lexWhile isDigit
return (parseInteger 10 ds, ds)
parseInteger :: Integer -> String -> Integer
parseInteger radix ds =
foldl1 (\n d -> n * radix + d) (map (toInteger . digitToInt) ds)
flagKW :: Token -> Lex a ()
flagKW t = do
when (t `elem` [KW_Do, KW_MDo]) $ do
exts <- getExtensionsL
when (NondecreasingIndentation `elem` exts) $
flagDo
showToken :: Token -> String
showToken t = case t of
VarId s -> s
QVarId (q,s) -> q ++ '.':s
IDupVarId s -> '?':s
ILinVarId s -> '%':s
ConId s -> s
QConId (q,s) -> q ++ '.':s
DVarId ss -> concat $ intersperse "-" ss
VarSym s -> s
ConSym s -> s
QVarSym (q,s) -> q ++ '.':s
QConSym (q,s) -> q ++ '.':s
IntTok (_, s) -> s
FloatTok (_, s) -> s
Character (_, s) -> '\'':s ++ "'"
StringTok (_, s) -> '"':s ++ "\""
IntTokHash (_, s) -> s ++ "#"
WordTokHash (_, s) -> s ++ "##"
FloatTokHash (_, s) -> s ++ "#"
DoubleTokHash (_, s) -> s ++ "##"
CharacterHash (_, s) -> '\'':s ++ "'#"
StringHash (_, s) -> '"':s ++ "\"#"
LeftParen -> "("
RightParen -> ")"
LeftHashParen -> "(#"
RightHashParen -> "#)"
LeftCurlyBar -> "{|"
RightCurlyBar -> "|}"
SemiColon -> ";"
LeftCurly -> "{"
RightCurly -> "}"
VRightCurly -> "virtual }"
LeftSquare -> "["
RightSquare -> "]"
Comma -> ","
Underscore -> "_"
BackQuote -> "`"
Dot -> "."
DotDot -> ".."
Colon -> ":"
DoubleColon -> "::"
Equals -> "="
Backslash -> "\\"
Bar -> "|"
LeftArrow -> "<-"
RightArrow -> "->"
At -> "@"
Tilde -> "~"
DoubleArrow -> "=>"
Minus -> "-"
Exclamation -> "!"
Star -> "*"
LeftArrowTail -> ">-"
RightArrowTail -> "-<"
LeftDblArrowTail -> ">>-"
RightDblArrowTail -> "-<<"
THExpQuote -> "[|"
THPatQuote -> "[p|"
THDecQuote -> "[d|"
THTypQuote -> "[t|"
THCloseQuote -> "|]"
THIdEscape s -> '$':s
THParenEscape -> "$("
THVarQuote -> "'"
THTyQuote -> "''"
THQuasiQuote (n,q) -> "[$" ++ n ++ "|" ++ q ++ "]"
RPGuardOpen -> "(|"
RPGuardClose -> "|)"
RPCAt -> "@:"
XCodeTagOpen -> "<%"
XCodeTagClose -> "%>"
XStdTagOpen -> "<"
XStdTagClose -> ">"
XCloseTagOpen -> "</"
XEmptyTagClose -> "/>"
XPCDATA s -> "PCDATA " ++ s
XRPatOpen -> "<["
XRPatClose -> "]>"
PragmaEnd -> "#-}"
RULES -> "{-# RULES"
INLINE b -> "{-# " ++ if b then "INLINE" else "NOINLINE"
INLINE_CONLIKE -> "{-# " ++ "INLINE_CONLIKE"
SPECIALISE -> "{-# SPECIALISE"
SPECIALISE_INLINE b -> "{-# SPECIALISE " ++ if b then "INLINE" else "NOINLINE"
SOURCE -> "{-# SOURCE"
DEPRECATED -> "{-# DEPRECATED"
WARNING -> "{-# WARNING"
SCC -> "{-# SCC"
GENERATED -> "{-# GENERATED"
CORE -> "{-# CORE"
UNPACK -> "{-# UNPACK"
OPTIONS (mt,s) -> "{-# OPTIONS" ++ maybe "" (':':) mt ++ " ..."
LANGUAGE -> "{-# LANGUAGE"
ANN -> "{-# ANN"
KW_As -> "as"
KW_By -> "by"
KW_Case -> "case"
KW_Class -> "class"
KW_Data -> "data"
KW_Default -> "default"
KW_Deriving -> "deriving"
KW_Do -> "do"
KW_MDo -> "mdo"
KW_Else -> "else"
KW_Family -> "family"
KW_Forall -> "forall"
KW_Group -> "group"
KW_Hiding -> "hiding"
KW_If -> "if"
KW_Import -> "import"
KW_In -> "in"
KW_Infix -> "infix"
KW_InfixL -> "infixl"
KW_InfixR -> "infixr"
KW_Instance -> "instance"
KW_Let -> "let"
KW_Module -> "module"
KW_NewType -> "newtype"
KW_Of -> "of"
KW_Proc -> "proc"
KW_Rec -> "rec"
KW_Then -> "then"
KW_Type -> "type"
KW_Using -> "using"
KW_Where -> "where"
KW_Qualified -> "qualified"
KW_Foreign -> "foreign"
KW_Export -> "export"
KW_Safe -> "safe"
KW_Unsafe -> "unsafe"
KW_Threadsafe -> "threadsafe"
KW_Interruptible -> "interruptible"
KW_StdCall -> "stdcall"
KW_CCall -> "ccall"
EOF -> "EOF"