{--------------------------------------------------------------------- A HASKELL LIBRARY OF MONADIC PARSER COMBINATORS 17th April 1997 Graham Hutton Erik Meijer University of Nottingham University of Utrecht This Haskell 1.3 library is derived from our forthcoming JFP article "Monadic Parsing in Haskell". The library also includes a few extra combinators that were not discussed in the article for reasons of space: o force (used to make "many" deliver results lazily); o digit, lower, upper, letter, alphanum (useful parsers); o ident, nat, int (useful token parsers). ---------------------------------------------------------------------} module Parselib where import Control.Monad --import Data.MonadPlus import Data.Char import Debug.Trace infixr 5 +++ -- Monad of parsers: ------------------------------------------------- newtype Parser a = Parser (String -> [(a,String)]) instance Monad Parser where return a = Parser (\cs -> [(a,cs)]) p >>= f = Parser (\cs -> concat [parse (f a) cs' | (a,cs') <- parse p cs]) --instance MonadZero Parser where -- zero = Parser (\cs -> []) instance MonadPlus Parser where p `mplus` q = Parser (\cs -> parse p cs ++ parse q cs) mzero = Parser (\cs -> []) -- Other parsing primitives: ----------------------------------------- parse :: Parser a -> String -> [(a,String)] parse (Parser p) = p readparseM msg p s = ( readparse . parse p ) s where readparse [(res,"")] = return res readparse (x:xs) = {-trace msg' -} fail $ msg' where msg' = msg ++ " readParseM, multiple parses: " ++ s readparse [] = {-trace msg' -} fail $ msg' where msg' = msg ++ " readParseM, no parse: " ++ s -- useful for debugging testStringParser p s = testStringParser' p s >>= putStrLn testStringParser' p s = readparseM "testStringParser" p s testPTestParser p pts s = testPTestParser' p pts s >>= putStrLn . show -- testPTestParser' condParser "=blee" "blee" testPTestParser' :: (Monad m) => Parser (String -> Bool) -> String -> String -> m Bool testPTestParser' p pts s = readparseM "testPTestParser" p pts >>= \pt -> ( return . pt ) s item :: Parser Char item = Parser (\cs -> case cs of "" -> [] (c:cs) -> [(c,cs)]) sat :: (Char -> Bool) -> Parser Char sat p = do c <- item if p c then return c else mzero satall ps = do c <- item if and (map (\p -> p c) ps) then return c else mzero -- Efficiency improving combinators: --------------------------------- force :: Parser a -> Parser a force p = Parser (\cs -> let xs = parse p cs in (fst (head xs), snd (head xs)) : tail xs) (+++) :: Parser a -> Parser a -> Parser a p +++ q = Parser (\cs -> case parse (p `mplus` q) cs of [] -> [] (x:xs) -> [x]) -- Recursion combinators: -------------------------------------------- string :: String -> Parser String string "" = return "" string (c:cs) = do {char c; string cs; return (c:cs)} many :: Parser a -> Parser [a] many p = force (many1 p +++ return []) many1 :: Parser a -> Parser [a] many1 p = do {a <- p; as <- many p; return (a:as)} sepby :: Parser a -> Parser b -> Parser [a] p `sepby` sep = (p `sepby1` sep) +++ return [] sepby1 :: Parser a -> Parser b -> Parser [a] p `sepby1` sep = do {a <- p; as <- many (do {sep; p}); return (a:as)} chainl :: Parser a -> Parser (a -> a -> a) -> a -> Parser a chainl p op a = (p `chainl1` op) +++ return a chainl1 :: Parser a -> Parser (a -> a -> a) -> Parser a p `chainl1` op = do {a <- p; rest a} where rest a = do {f <- op; b <- p; rest (f a b)} +++ return a {- ops :: [(Parser a, b)] -> Parser b ops xs = do (p,op) <- xs op <- p return $ foldr1 (+++) op -} -- Useful parsers: --------------------------------------------------- char :: Char -> Parser Char char c = sat (c ==) anychar = sat (const True) notchar c = sat (c /=) notchars cs = satall ( map ( \c -> (c /=) ) cs) digit :: Parser Int digit = do {c <- sat isDigit; return (ord c - ord '0')} lower :: Parser Char lower = sat isLower upper :: Parser Char upper = sat isUpper letter :: Parser Char letter = sat isAlpha alphanum :: Parser Char alphanum = sat isAlphaNum symb :: String -> Parser String symb cs = token (string cs) ident :: [String] -> Parser String ident css = do cs <- token identifier guard (not (elem cs css)) return cs identifier :: Parser String identifier = do {c <- lower; cs <- many alphanum; return (c:cs)} nat :: Parser Int nat = token natural natural :: Parser Int natural = digit `chainl1` return (\m n -> 10*m + n) int :: Parser Int int = token integer integer :: Parser Int integer = do {char '-'; n <- natural; return (-n)} +++ nat -- Lexical combinators: ---------------------------------------------- -- match zero or more spaces (many matches the empty string, like * in pcre) space :: Parser String space = many (sat isSpace) token :: Parser a -> Parser a token p = do {a <- p; space; return a} apply :: Parser a -> String -> [(a,String)] apply p = parse (do {space; p}) -- Example parser for arithmetic expressions: ------------------------ -- -- expr :: Parser Int -- addop :: Parser (Int -> Int -> Int) -- mulop :: Parser (Int -> Int -> Int) -- -- expr = term `chainl1` addop -- term = factor `chainl1` mulop -- factor = token digit +++ do {symb "("; n <- expr; symb ")"; return n} -- -- addop = do {symb "+"; return (+)} +++ do {symb "-"; return (-)} -- mulop = do {symb "*"; return (*)} +++ do {symb "/"; return (div)} -- ---------------------------------------------------------------------- word = many letter bracket :: Parser a -> Parser b -> Parser c -> Parser b bracket open contents close = do startBracket <- open contents <- contents endBracket <- close return contents --testinspaceE = mapM_ ( readparseM "err" inspace >>= putStrLn . show ) ["blee"] --maybesome p = many1 p +++ ( string "") --maybespace = maybesome (string " ") --mInspace contents = contents +++ space contents +++ contents space +++ inspace contents ------------- monadic parser combinators paper example testCalcE = readparseM "err" calcExpression "1+2 - 2*(3+4)" >>= putStrLn . show calcExpression :: Parser Int calcExpression = ( term `chainl1` multop ) `chainl1` addop where term = token $ nat +++ bracket ( char '(' ) calcExpression ( char ')' ) addop = ops [( token $ char '+', (+)), ( token $ char '-', (-))] multop = ops [(token $ char '*',(*))] ops :: [(Parser a, (b->b->b))] -> Parser (b->b->b) ops xs = ( foldr1 ( +++ ) . map f ) xs where f (p,op) = do p return op