\begin{code}
{-# OPTIONS_GHC -Wall #-}
{-# LANGUAGE InstanceSigs #-}
module Mar04 where
import Data.Char (digitToInt, isAlpha, isDigit, isSpace)
import Prelude hiding (rem)
\end{code}
Learning objectives:
\begin{itemize}
\item parsing
\end{itemize}
Parsing! As a list of successful parses
Works well for when you allow ambiguous grammars…
\begin{code}
type Parse a b = [a] -> [(b, [a])]
\end{code}
But for A3, we don’t. So we can, and do, simplify to:
\begin{code}
data Parsed a
= Failure
| Success {value :: a, remainingInput :: String}
deriving (Show, Eq, Ord)
newtype Parser a = MkP {applyParser :: String -> Parsed a}
\end{code}
We
\begin{enumerate}
\item Specialize to parsing strings
\item Only succeed if there is a unique parse
\item Use a special type to make things clearer
\end{enumerate}
The Functor instance for Parsed is the ‘same’ as for Maybe:
\begin{code}
instance Functor Parsed where
fmap _ Failure = Failure
fmap f (Success v rest) = Success (f v) rest
\end{code}
That for Parser then “maps inside”.
\begin{code}
instance Functor Parser where
fmap f p = MkP $ fmap f . applyParser p
\end{code}
\begin{code}
instance Applicative Parser where
pure :: a -> Parser a
pure = MkP . Success
— funP : a parser function
— aP : a parser
funP <*> aP = MkP $ \input -> case applyParser funP input of
Failure -> Failure
Success f rem -> f <$> applyParser aP rem
\end{code}
\begin{code}
instance Monad Parser where
return = pure
— `bind` – often called `andThen`
(>>=) :: Parser a -> (a -> Parser b) -> Parser b
p >>= f = MkP $ \input -> case applyParser p input of
Failure -> Failure
Success a rem -> applyParser (f a) rem
\end{code}
Get value result from applying a parser, throw away the rest.
\begin{code}
runParser :: Parser a -> String -> a
runParser p inp = case applyParser p inp of
Failure -> error “Parser failed!”
Success a _ -> a
\end{code}
A collection of useful parsers:
\begin{code}
— Parser that always fails.
— i.e. none
failure :: Parser a
failure = MkP $ const Failure
— Parse the first character of the input if it’s non empty, otherwise fails
item :: Parser Char
item = MkP $ \inp -> case inp of
[] -> Failure
(c : cs) -> Success c cs
— Parse a character satisfying predicate p
— i.e. spot
sat :: (Char -> Bool) -> Parser Char
sat p = do
c <- item
if p c
then return c
else failure
-- Look for a specific character as the next item in the input string
-- i.e. token
char :: Char -> Parser Char
— char c = sat (== c)
char c = do
x <- item
if x == c
then return c
else failure
-- Look for a digit, returning the associated numeric value if successful
digit :: Parser Int
digit = do
d <- sat isDigit
return $ digitToInt d
-- digit = sat isDigit >>= (return . digitToInt)
— orElse (hoogle “Alternative”)
(<|>) :: Parser a -> Parser a -> Parser a
p <|> q = MkP $ \input -> case applyParser p input of
Failure -> applyParser q input
Success a rem -> Success a rem
many :: Parser a -> Parser [a]
many p = get <|> pure []
where
get = do
x <- p
xs <- many p
return (x : xs)
-- Parse a list of something with given separator (requires at least one)
someWith :: Parser s -> Parser a -> Parser [a]
someWith separator p = do
x <- p
xs <- many (do _ <- separator; p)
return (x : xs)
\end{code}
And now for a concrete example:
\begin{code}
data Expr = Lit Int | Var String | Op Ops Expr Expr
deriving Show
data Ops = Add | Sub | Mul | Div
deriving Show
charToOp :: Char -> Ops
charToOp ‘+’ = Add
charToOp ‘-‘ = Sub
charToOp ‘*’ = Mul
charToOp ‘/’ = Div
isOp :: Char -> Bool
isOp c = c == ‘+’ || c == ‘-‘ || c == ‘*’ || c == ‘/’
parsePExpr :: Parser Expr
parsePExpr =
do _ <- char '('
res <- parseExpr
_ <- char ')'
return res
parseBinExpr :: Parser Expr
parseBinExpr =
do e1 <- parseLit <|> parseExpr <|> parsePExpr
bop <- sat isOp
e2 <- parseLit <|> parseExpr<|> parsePExpr
return (Op (charToOp bop) e1 e2)
parseExpr :: Parser Expr
parseExpr = parseBinExpr <|> parsePExpr <|> parseLit
parseLit :: Parser Expr
parseLit = fmap (\x -> Lit $ read x) (many $ sat isDigit)
\end{code}