Haskell写的Parser
发布时间:2020-12-14 04:30:25 所属栏目:大数据 来源:网络整理
导读:干货第二波,Haskell实现的Parser,支持运算语句和备注等,输出可以作为Interpreter的输入 Parser combinator?选用的是 ReadP. ? module Parser.Impl whereimport SubsAstimport Text.ParserCombinators.ReadP as Pimport Data.Charimport Control.Applicativ
干货第二波,Haskell实现的Parser,支持运算语句和备注等,输出可以作为Interpreter的输入 Parser combinator?选用的是 ReadP. ? module Parser.Impl where import SubsAst import Text.ParserCombinators.ReadP as P import Data.Char import Control.Applicative data ParseError = ParseError String deriving (Show,Eq) -- allowed chars which should be allowed in pString allowedChars :: ReadP Char allowedChars = do _ <- char ‘‘ c <- satisfy (`elem` "‘nt") case c of ‘n‘ -> return ‘n‘ ‘t‘ -> return ‘t‘ x -> return x -- keywords which should not be used as a var name no when assign or get var reserved :: [String] reserved = ["if","for","of","true","false","undefined"] -- helper function for read chars of variable name checkChar :: Char -> Char -> Bool checkChar sym x = x == sym -- return Var pIdent :: ReadP Expr pIdent = token $ do fist <- satisfy isLetter send <- munch (x -> isLetter x || isDigit x || checkChar ‘_‘ x) let idt = fist : send if idt `notElem` reserved then return $ Var idt else fail "keyword can not be used as ident" -- doing get Ident for assign pIdget :: ReadP Ident pIdget = do fist <- satisfy isLetter send <- munch (x -> isLetter x || isDigit x || checkChar ‘_‘ x) let idt = fist : send if idt `notElem` reserved then return idt else fail "keyword can not be used as ident" -- for readin number pNumber :: ReadP Expr pNumber = do sym <- option ‘ ‘ (char ‘-‘) number <- munch1 isDigit if length number <= 8 then return (Number (read (sym : number) :: Int)) else fail "invalid number" -- for readin string pString :: ReadP Expr pString = do _ <- char ‘‘‘ s <- P.many (allowedChars <|> satisfy (x -> isPrint x && notElem x "t‘‘") <|> (do _ <- char ‘‘ _ <- char ‘n‘ return ‘ ‘)) _ <- char ‘‘‘ return (String s) -- comment pComment :: ReadP String pComment = between (string "//") (char ‘n‘) (munch ( /= ‘n‘)) -- token -- we keep the skipMany1 pComment for skip comments before token but -- it might cause time out sometimes token :: ReadP a -> ReadP a token p = do skipSpaces <|> skipMany1 pComment a <- p skipSpaces <|> skipMany1 pComment return a -- symbol is using for read in a symbol or keyword such as "for" and "if" symbol :: String -> ReadP String symbol s = token $ string s -- read true pTrue :: ReadP Expr pTrue = do _ <- token $ string "true" return TrueConst -- read false pFalse :: ReadP Expr pFalse = do _ <- token $ string "false" return FalseConst -- read Undefined pUndefined :: ReadP Expr pUndefined = do _ <- token $ string "undefined" return Undefined -- stands for the Eq of our grammar tree pAssign :: ReadP Expr pAssign = do idt <- token pIdget _ <- symbol "=" val <- token pExpr0 return (Assign idt val) -- array pArray :: ReadP Expr pArray = (do _ <- symbol "[" exps <- pExprs _ <- symbol "]" return (Array exps)) <++ (do _ <- symbol "[" _ <- symbol "]" return (Array [Undefined])) -- stands for the Expr of root of grammar tree pExpr :: ReadP Expr pExpr = (do a <- token pExpr1 b <- token pouterComma return (Comma a b)) <++ token pAssign <++ token pExpr1 -- stands for the Expr0 of grammar tree,with precidence of 0 pExpr0 :: ReadP Expr pExpr0 = (do _ <- symbol "=" token pExpr0) <++ token pExpr1 -- stands for the Expr1 of grammar tree,with precidence of 1 pExpr1 :: ReadP Expr pExpr1 = (do exp2 <- token pExpr2 _ <- symbol "===" exp1 <- token pExpr2 pOpExpr1 (Call "===" [exp2,exp1])) <++ (do exp2 <- token pExpr2 _ <- symbol "<" exp1 <- token pExpr2 pOpExpr1 (Call "<" [exp2,exp1])) <++ token pAssign <++ token pExpr2 -- to help pExpr1 deal with left associativity pOpExpr1 :: Expr -> ReadP Expr pOpExpr1 a = (do _ <- symbol "===" exp2 <- token pExpr1 return (Call "===" [a,exp2])) <++ (do _ <- symbol "<" exp2 <- token pExpr1 return (Call "<" [a,exp2])) <++ return a -- stands for the Expr2 of grammar tree,with precidence of 2 pExpr2 :: ReadP Expr pExpr2 = (do a <- token pExpr3 _ <- symbol "+" b <- token pExpr3 pOpExpr2 (Call "+" [a,b])) <++ (do a <- token pExpr3 _ <- symbol "-" b <- token pExpr3 pOpExpr2 (Call "-" [a,b])) <++ token pExpr3 -- to help pExpr2 deal with left associativity pOpExpr2 :: Expr -> ReadP Expr pOpExpr2 a = (do _ <- symbol "+" exp2 <- token pExpr2 return (Call "+" [a,exp2])) <++ (do _ <- symbol "-" exp2 <- token pExpr2 return (Call "-" [a,exp2])) <++ return a -- stands for the Expr3 of grammar tree,with precidence of 3 pExpr3 :: ReadP Expr pExpr3 = (do a <- token pExpr4 _ <- symbol "*" b <- token pExpr4 pOpExpr3 (Call "*" [a,b])) <++ (do a <- token pExpr4 _ <- symbol "%" b <- token pExpr4 pOpExpr3 (Call "%" [a,b])) <++ token pExpr4 -- to help pExpr3 deal with left associativity pOpExpr3 :: Expr -> ReadP Expr pOpExpr3 a = (do _ <- symbol "*" exp2 <- token pExpr3 return (Call "*" [a,exp2])) <++ (do _ <- symbol "%" exp2 <- token pExpr3 return (Call "%" [a,exp2])) <++ return a -- stands for the Expr4 of grammar tree,with precidence of 4 pExpr4 :: ReadP Expr pExpr4 = (do _ <- symbol "(" e <- token pExpr _ <- symbol ")" return e) <++ pArray <++(do _ <- symbol "[" e <- token pComprFor _ <- symbol "]" return e) <++ (do idt <- token pIdget _ <- symbol "(" e <- token pExprs _ <- symbol ")" return $ Call idt e) <++ pNumber <++ pIdent <++ pString <++ pTrue <++ pFalse <++ pUndefined -- stands for the OuterComma of the grammar tree pouterComma :: ReadP Expr pouterComma = do _ <- symbol "," token pExpr -- helpfunction for pcommaExprs and pExprs commaHelper :: ReadP [Expr] commaHelper = do exp1 <- token pExpr1 com <- token pcommaExprs return (exp1:com) -- stands for the commaExprs of the grammar tree pcommaExprs :: ReadP [Expr] pcommaExprs = (do _ <- symbol "," commaHelper) <++ (do _ <- symbol "," exp1 <- token pExpr1 return [exp1]) -- stands for the Exprs of the grammar tree pExprs :: ReadP [Expr] pExprs = (do exp1 <- token pExpr1 return [exp1]) <++ commaHelper -- stands for ArrayFor of the grammar tree pArrayFor :: ReadP ArrayCompr pArrayFor = do _ <- symbol "for" _ <- symbol "(" id‘ <- token pIdget _ <- symbol "of" exp1 <- token pExpr1 _ <- symbol ")" ac <- token pArrayCompr return (ACFor id‘ exp1 ac) -- stands for the ArrayIf of the grammar tree pArrayIf :: ReadP ArrayCompr pArrayIf = do _ <- symbol "if" _ <- symbol "(" exp1 <- token pExpr1 _ <- symbol ")" ac <- token pArrayCompr return (ACIf exp1 ac) -- organize the ACIf,ACFor and ACBody pArrayCompr :: ReadP ArrayCompr pArrayCompr = token pArrayIf <++ token pArrayFor <++ (do ex <- token pExpr1 return (ACBody ex)) -- stands for the ArrayCompr of grammar tree pComprFor :: ReadP Expr pComprFor = do ar <- pArrayFor return (Compr ar) -- out put the parse result parseString :: String -> Either ParseError Expr parseString str = if null (readP_to_S pExpr str) then Left(ParseError "Invalid expression") else (do let legalstr = [x | x <- readP_to_S pExpr str,snd x == ""] if null legalstr then Left( ParseError "Invalid expression") else (do let stri = fst (head legalstr) case stri of String s -> Right (String [x | x <- s,x `notElem` "NUL"]) a -> Right a)) (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |