— | Library of parser definition and operations.
module ParserLib where
import Control.Applicative
import Data.Char
import Data.Functor
import Data.List
data Parser a = MkParser (String -> Maybe (String, a))
— Function from input string to:
—
— * Nothing, if failure (syntax error);
— * Just (unconsumed input, answer), if success.
unParser :: Parser a -> String -> Maybe (String, a)
unParser (MkParser sf) = sf
— Monadic Parsing in Haskell uses [] instead of Maybe to support ambiguous
— grammars and multiple answers.
— | Use a parser on an input string.
runParser :: Parser a -> String -> Maybe a
runParser (MkParser sf) inp = case sf inp of
Nothing -> Nothing
Just (_, a) -> Just a
— OR: fmap (\(_,a) -> a) (sf inp)
—————————–
— Character-level primitives
—————————–
— | Read a character and return. Failure if input is empty.
anyChar :: Parser Char
anyChar = MkParser sf
where
sf “” = Nothing
sf (c:cs) = Just (cs, c)
— | Read a character and check against the given character.
char :: Char -> Parser Char
char wanted = MkParser sf
where
sf (c:cs) | c == wanted = Just (cs, c)
sf _ = Nothing
— | Read a character and check against the given predicate.
satisfy :: (Char -> Bool) -> Parser Char
satisfy pred = MkParser sf
where
sf (c:cs) | pred c = Just (cs, c)
sf _ = Nothing
— | Expect the input to be empty.
eof :: Parser ()
eof = MkParser sf
where
sf “” = Just (“”, ())
sf _ = Nothing
— But you have to compose smaller parsers to build larger parsers and to return
— more interesting answers, e.g., abstract syntax trees.
—
— This is what fmap, pure, <*>, >>= are for. And there are more…
instance Functor Parser where
— fmap :: (a -> b) -> Parser a -> Parser b
fmap f (MkParser sf) = MkParser sfb
where
sfb inp = case sf inp of
Nothing -> Nothing
Just (rest, a) -> Just (rest, f a)
— OR: fmap (\(rest, a) -> (rest, f a)) (sf inp)
instance Applicative Parser where
— pure :: a -> Parser a
pure a = MkParser (\inp -> Just (inp, a))
— liftA2 :: (a -> b -> c) -> Parser a -> Parser b -> Parser c
— Consider the 1st parser to be stage 1, 2nd parser stage 2.
liftA2 op (MkParser sf1) p2 = MkParser g
where
g inp = case sf1 inp of
Nothing -> Nothing
Just (middle, a) ->
case unParser p2 middle of
Nothing -> Nothing
Just (rest, b) -> Just (rest, op a b)
instance Monad Parser where
— return :: a -> Parser a
return = pure
— (>>=) :: Parser a -> (a -> Parser b) -> Parser b
MkParser sf1 >>= k = MkParser g
where
g inp = case sf1 inp of
Nothing -> Nothing
Just (rest, a) -> unParser (k a) rest
instance Alternative Parser where
— empty :: Parser a
— Always fail.
empty = MkParser (\_ -> Nothing)
— (<|>) :: Parser a -> Parser a -> Parser a
— Try the 1st one. If success, done; if failure, do the 2nd one
MkParser sf1 <|> p2 = MkParser g
where
g inp = case sf1 inp of
Nothing -> unParser p2 inp
j -> j — the Just case
— many :: Parser a -> Parser [a]
— 0 or more times, maximum munch, collect the answers into a list.
— Can use default implementation. And it goes as:
many p = some p <|> pure []
— How to make sense of it: To repeat 0 or more times, first try 1 or more
— times! If that fails, then we know it’s 0 times, and the answer is the
— empty list.
— some :: Parser a -> Parser [a]
— 1 or more times, maximum munch, collect the answers into a list.
— Can use default implementation. And it goes as:
some p = liftA2 (:) p (many p)
— How to make sense of it: To repeat 1 or more times, do 1 time, then 0 or
— more times! Use liftA2 to chain up and collect answers.
— | Space or tab or newline (unix and windows).
whitespace :: Parser Char
whitespace = satisfy (\c -> c `elem` [‘\t’, ‘\n’, ‘\r’, ‘ ‘])
— | Consume zero or more whitespaces, maximum munch.
whitespaces :: Parser String
whitespaces = many whitespace
————————-
— Token-level primitives
————————-
— | Read a natural number (non-negative integer), then skip trailing spaces.
natural :: Parser Integer
natural = fmap read (some (satisfy isDigit)) <* whitespaces
-- read :: Read a => String -> a
— For converting string to your data type, assuming valid string. Integer
— is an instance of Read, and our string is valid, so we can use read.
— | Read an identifier, then skip trailing spaces. Disallow the listed keywords.
identifier :: [String] -> Parser String
identifier keywords =
satisfy isAlpha
>>= \c -> many (satisfy isAlphaNum)
>>= \cs -> whitespaces
>> let str = c:cs
in if str `elem` keywords then empty else return str
— | Read the wanted keyword, then skip trailing spaces.
keyword :: String -> Parser String
keyword wanted =
satisfy isAlpha
>>= \c -> many (satisfy isAlphaNum)
>>= \cs -> whitespaces
*> if c:cs == wanted then return wanted else empty
— | Read something that looks like an operator, then skip trailing spaces.
anyOperator :: Parser String
anyOperator = some (satisfy symChar) <* whitespaces
where
symChar c = c `elem` ":=/<>&|+-*%!\\”
— | Read the wanted operator, then skip trailing spaces.
operator :: String -> Parser String
operator wanted =
anyOperator
>>= \sym -> if sym == wanted then return wanted else empty
— | Open and close parentheses.
openParen, closeParen :: Parser Char
openParen = char ‘(‘ <* whitespaces
closeParen = char ')' <* whitespaces
---------------------------------------
-- Canned solutions for infix operators
---------------------------------------
-- | One or more operands separated by an operator. Apply the operator(s) in a
-- right-associating way.
chainr1 :: Parser a -- ^ operand parser
-> Parser (a -> a -> a) — ^ operator parser
-> Parser a — ^ whole answer
chainr1 getArg getOp = liftA2 link
getArg
(optional
(liftA2 (,) getOp (chainr1 getArg getOp)))
where
link x Nothing = x
link x (Just (op,y)) = op x y
— | One or more operands separated by an operator. Apply the operator(s) in a
— left-associating way.
chainl1 :: Parser a — ^ operand parser
-> Parser (a -> a -> a) — ^ operator parser
-> Parser a — ^ whole answer
chainl1 getArg getOp = liftA2 link
getArg
(many (liftA2 (,) getOp getArg))
where
link x opys = foldl (\accum (op,y) -> op accum y) x opys