From 9c6ccbad476738d2b46f9855360b7593fecba9ab Mon Sep 17 00:00:00 2001 From: Dougal Date: Wed, 13 Dec 2023 23:01:52 -0500 Subject: Tweak binop source ids so that colon, dollar etc are considered atomic --- src/lib/AbstractSyntax.hs | 44 +++++++++++++++--------------- src/lib/ConcreteSyntax.hs | 64 +++++++++++++++++++++++--------------------- src/lib/Lexing.hs | 12 ++++++--- src/lib/SourceIdTraversal.hs | 6 ++++- src/lib/Types/Source.hs | 5 ++-- 5 files changed, 73 insertions(+), 58 deletions(-) diff --git a/src/lib/AbstractSyntax.hs b/src/lib/AbstractSyntax.hs index e7a396ce..a169ce4d 100644 --- a/src/lib/AbstractSyntax.hs +++ b/src/lib/AbstractSyntax.hs @@ -195,7 +195,7 @@ withTrailingConstraints :: GroupW -> (GroupW -> SyntaxM (UAnnBinder VoidS VoidS)) -> SyntaxM (Nest UAnnBinder VoidS VoidS) withTrailingConstraints g cont = case g of - WithSrcs _ _ (CBin (WithSrc _ Pipe) lhs c) -> do + WithSrcs _ _ (CBin Pipe lhs c) -> do Nest (UAnnBinder expl (WithSrcB sid b) ann cs) bs <- withTrailingConstraints lhs cont s <- case b of UBindSource s -> return s @@ -253,7 +253,7 @@ explicitBindersOptAnn (WithSrcs _ _ bs) = -- Binder pattern with an optional type annotation patOptAnn :: GroupW -> SyntaxM (UPat VoidS VoidS, Maybe (UType VoidS)) -patOptAnn (WithSrcs _ _ (CBin (WithSrc _ Colon) lhs typeAnn)) = (,) <$> pat lhs <*> (Just <$> expr typeAnn) +patOptAnn (WithSrcs _ _ (CBin Colon lhs typeAnn)) = (,) <$> pat lhs <*> (Just <$> expr typeAnn) patOptAnn (WithSrcs _ _ (CParens [g])) = patOptAnn g patOptAnn g = (,Nothing) <$> pat g @@ -267,7 +267,7 @@ uBinder (WithSrcs sid _ b) = case b of tyOptPat :: GroupW -> SyntaxM (UAnnBinder VoidS VoidS) tyOptPat grpTop@(WithSrcs sid _ grp) = case grp of -- Named type - CBin (WithSrc _ Colon) lhs typeAnn -> + CBin Colon lhs typeAnn -> UAnnBinder Explicit <$> uBinder lhs <*> (UAnn <$> expr typeAnn) <*> pure [] -- Binder in grouping parens. CParens [g] -> tyOptPat g @@ -285,7 +285,7 @@ casePat = \case pat :: GroupW -> SyntaxM (UPat VoidS VoidS) pat (WithSrcs sid _ grp) = WithSrcB sid <$> case grp of - CBin (WithSrc _ DepComma) lhs rhs -> do + CBin DepComma lhs rhs -> do lhs' <- pat lhs rhs' <- pat rhs return $ UPatDepPair $ PairB lhs' rhs' @@ -316,8 +316,8 @@ pat (WithSrcs sid _ grp) = WithSrcB sid <$> case grp of tyOptBinder :: Explicitness -> GroupW -> SyntaxM (UAnnBinder VoidS VoidS) tyOptBinder expl (WithSrcs sid sids grp) = case grp of - CBin (WithSrc _ Pipe) _ rhs -> throw (getSrcId rhs) UnexpectedConstraint - CBin (WithSrc _ Colon) name ty -> do + CBin Pipe _ rhs -> throw (getSrcId rhs) UnexpectedConstraint + CBin Colon name ty -> do b <- uBinder name ann <- UAnn <$> expr ty return $ UAnnBinder expl b ann [] @@ -327,7 +327,7 @@ tyOptBinder expl (WithSrcs sid sids grp) = case grp of binderOptTy :: Explicitness -> GroupW -> SyntaxM (UAnnBinder VoidS VoidS) binderOptTy expl = \case - WithSrcs _ _ (CBin (WithSrc _ Colon) name ty) -> do + WithSrcs _ _ (CBin Colon name ty) -> do b <- uBinder name ann <- UAnn <$> expr ty return $ UAnnBinder expl b ann [] @@ -336,7 +336,7 @@ binderOptTy expl = \case return $ UAnnBinder expl b UNoAnn [] binderReqTy :: Explicitness -> GroupW -> SyntaxM (UAnnBinder VoidS VoidS) -binderReqTy expl (WithSrcs _ _ (CBin (WithSrc _ Colon) name ty)) = do +binderReqTy expl (WithSrcs _ _ (CBin Colon name ty)) = do b <- uBinder name ann <- UAnn <$> expr ty return $ UAnnBinder expl b ann [] @@ -347,7 +347,7 @@ argList gs = partitionEithers <$> mapM singleArg gs singleArg :: GroupW -> SyntaxM (Either (UExpr VoidS) (UNamedArg VoidS)) singleArg = \case - WithSrcs _ _ (CBin (WithSrc _ CSEqual) lhs rhs) -> Right <$> + WithSrcs _ _ (CBin CSEqual lhs rhs) -> Right <$> ((,) <$> withoutSrc <$> identifier "named argument" lhs <*> expr rhs) g -> Left <$> expr g @@ -417,7 +417,7 @@ blockDecls (d:ds) = do -- === Concrete to abstract syntax of expressions === expr :: GroupW -> SyntaxM (UExpr VoidS) -expr (WithSrcs sid _ grp) = WithSrcE sid <$> case grp of +expr (WithSrcs sid sids grp) = WithSrcE sid <$> case grp of CLeaf x -> leaf sid x CPrim prim xs -> UPrim prim <$> mapM expr xs CParens [g] -> do @@ -449,9 +449,9 @@ expr (WithSrcs sid _ grp) = WithSrcE sid <$> case grp of args' <- mapM expr args return $ UTabApp f args' _ -> error "unexpected postfix group (should be ruled out at grouping stage)" - CBin (WithSrc opSid op) lhs rhs -> case op of - Dollar -> extendAppRight <$> expr lhs <*> expr rhs - Pipe -> extendAppLeft <$> expr lhs <*> expr rhs + CBin op lhs rhs -> case op of + Dollar -> extendAppRight <$> expr lhs <*> expr rhs + Pipe -> extendAppLeft <$> expr lhs <*> expr rhs Dot -> do lhs' <- expr lhs WithSrcs rhsSid _ rhs' <- return rhs @@ -461,13 +461,17 @@ expr (WithSrcs sid _ grp) = WithSrcE sid <$> case grp of _ -> throw rhsSid BadField return $ UFieldAccess lhs' (WithSrc rhsSid name) DoubleColon -> UTypeAnn <$> (expr lhs) <*> expr rhs - EvalBinOp s -> evalOp s + EvalBinOp (WithSrc opSid s) -> do + let f = WithSrcE opSid (fromSourceNameW (WithSrc opSid s)) + lhs' <- expr lhs + rhs' <- expr rhs + return $ explicitApp f [lhs', rhs'] DepAmpersand -> do lhs' <- tyOptPat lhs UDepPairTy . (UDepPairType ExplicitDepPair lhs') <$> expr rhs DepComma -> UDepPair <$> (expr lhs) <*> expr rhs - CSEqual -> throw opSid BadEqualSign - Colon -> throw opSid BadColon + CSEqual -> throw errSid BadEqualSign + Colon -> throw errSid BadColon ImplicitArrow -> case lhs of WithSrcs _ _ (CParens gs) -> do bs <- aPiBinders gs @@ -478,11 +482,9 @@ expr (WithSrcs sid _ grp) = WithSrcE sid <$> case grp of lhs' <- tyOptPat lhs UTabPi . (UTabPiExpr lhs') <$> expr rhs where - evalOp s = do - let f = WithSrcE opSid (fromSourceNameW (WithSrc opSid s)) - lhs' <- expr lhs - rhs' <- expr rhs - return $ explicitApp f [lhs', rhs'] + errSid = case sids of + [sid'] -> sid' + _ -> sid CPrefix (WithSrc prefixSid name) g -> do case name of "+" -> (withoutSrc <$> expr g) <&> \case diff --git a/src/lib/ConcreteSyntax.hs b/src/lib/ConcreteSyntax.hs index 6104c4df..5209466d 100644 --- a/src/lib/ConcreteSyntax.hs +++ b/src/lib/ConcreteSyntax.hs @@ -68,19 +68,22 @@ mustParseSourceBlock s = mustParseit s sourceBlock -- === helpers for target ADT === -interpOperator :: String -> Bin -interpOperator = \case - "&>" -> DepAmpersand - "." -> Dot - ",>" -> DepComma - ":" -> Colon - "|" -> Pipe - "::" -> DoubleColon - "$" -> Dollar - "->>" -> ImplicitArrow - "=>" -> FatArrow - "=" -> CSEqual - name -> EvalBinOp $ fromString $ "(" <> name <> ")" +interpOperator :: SrcId -> String -> ([SrcId], Bin) +interpOperator sid = \case + "&>" -> atomic DepAmpersand + "." -> atomic Dot + ",>" -> atomic DepComma + ":" -> atomic Colon + "|" -> atomic Pipe + "::" -> atomic DoubleColon + "$" -> atomic Dollar + "->>" -> atomic ImplicitArrow + "=>" -> atomic FatArrow + "=" -> atomic CSEqual + name -> ([], EvalBinOp $ WithSrc sid $ fromString $ "(" <> name <> ")") + where + atomic :: Bin -> ([SrcId], Bin) + atomic b = ([sid], b) pattern Identifier :: SourceName -> GroupW pattern Identifier name <- (WithSrcs _ _ (CLeaf (CIdentifier name))) @@ -551,9 +554,9 @@ leafGroup = leafGroup' >>= appendPostfixGroups appendFieldAccess :: GroupW -> Parser Group appendFieldAccess g = try do - sid <- dot + dot field <- cFieldName - return $ CBin (WithSrc sid Dot) g field + return $ CBin Dot g field cFieldName :: Parser GroupW cFieldName = cIdentifier <|> (toCLeaf CNat <$> natLit) @@ -622,15 +625,17 @@ ops = ] where other = ("other", anySymOp) backquote = ("backquote", Expr.InfixL backquoteOp) - juxtaposition = ("space", Expr.InfixL $ sc >> addSrcIdToBinOp (return $ CJuxtapose True)) + juxtaposition = ("space", Expr.InfixL $ sc >> addSrcIdToBinOp (return \x y -> ([], CJuxtapose True x y))) withClausePostfix = ("with", Expr.Postfix withClausePostfixOp) arrow = ("->", Expr.InfixR arrowOp) -addSrcIdToBinOp :: Parser (GroupW -> GroupW -> Group) -> Parser (GroupW -> GroupW -> GroupW) +addSrcIdToBinOp :: Parser (GroupW -> GroupW -> ([SrcId], Group)) -> Parser (GroupW -> GroupW -> GroupW) addSrcIdToBinOp op = do f <- op sid <- freshSrcId - return \x y -> WithSrcs sid [] $ f x y + return \x y -> do + let (atomicSids, g) = f x y + WithSrcs sid atomicSids g {-# INLINE addSrcIdToBinOp #-} addSrcIdToUnOp :: Parser (GroupW -> Group) -> Parser (GroupW -> GroupW) @@ -642,16 +647,13 @@ addSrcIdToUnOp op = do backquoteOp :: Parser (GroupW -> GroupW -> GroupW) backquoteOp = binApp do - WithSrc sid fname <- backquoteName - return $ WithSrc sid $ EvalBinOp fname + fname <- backquoteName + return ([], EvalBinOp fname) anySymOp :: Expr.Operator Parser GroupW anySymOp = Expr.InfixL $ binApp do WithSrc sid s <- label "infix operator" (mayBreak anySym) - return $ WithSrc sid $ interpOperator s - -infixSym :: String -> Parser SrcId -infixSym s = mayBreak $ symWithId $ T.pack s + return $ interpOperator sid s symOpN :: String -> (SourceName, Expr.Operator Parser GroupW) symOpN s = (fromString s, Expr.InfixN $ symOp s) @@ -664,14 +666,14 @@ symOpR s = (fromString s, Expr.InfixR $ symOp s) symOp :: String -> Parser (GroupW -> GroupW -> GroupW) symOp s = binApp do - sid <- label "infix operator" (infixSym s) - return $ WithSrc sid $ interpOperator s + sid <- label "infix operator" (mayBreak $ symWithId $ T.pack s) + return $ interpOperator sid s arrowOp :: Parser (GroupW -> GroupW -> GroupW) arrowOp = addSrcIdToBinOp do - sym "->" + sid <- symWithId "->" optEffs <- optional cEffs - return \lhs rhs -> CArrow lhs optEffs rhs + return \lhs rhs -> ([sid], CArrow lhs optEffs rhs) unOpPre :: String -> (SourceName, Expr.Operator Parser GroupW) unOpPre s = (fromString s, Expr.Prefix $ prefixOp s) @@ -681,8 +683,10 @@ prefixOp s = addSrcIdToUnOp do symId <- symWithId (fromString s) return $ CPrefix (WithSrc symId $ fromString s) -binApp :: Parser BinW -> Parser (GroupW -> GroupW -> GroupW) -binApp f = addSrcIdToBinOp $ CBin <$> f +binApp :: Parser ([SrcId], Bin) -> Parser (GroupW -> GroupW -> GroupW) +binApp f = addSrcIdToBinOp do + (sids, op) <- f + return \x y -> (sids, CBin op x y) withClausePostfixOp :: Parser (GroupW -> GroupW) withClausePostfixOp = addSrcIdToUnOp do diff --git a/src/lib/Lexing.hs b/src/lib/Lexing.hs index 71ff733d..3d7656e2 100644 --- a/src/lib/Lexing.hs +++ b/src/lib/Lexing.hs @@ -218,8 +218,10 @@ symChars :: HS.HashSet Char symChars = HS.fromList ".,!$^&*:-~+/=<>|?\\@#" -- XXX: unlike other lexemes, this doesn't consume trailing whitespace -dot :: Parser SrcId -dot = srcPos <$> lexeme' (return ()) Symbol (void $ char '.') +dot :: Parser () +dot = do + WithSrc sid () <- lexeme' (return ()) Symbol (void $ char '.') + emitAtomicLexeme sid -- === Util === @@ -372,9 +374,13 @@ lexeme' sc' lexemeType p = do atomicLexeme :: LexemeType -> Parser () -> Parser () atomicLexeme lexemeType p = do WithSrc sid () <- lexeme lexemeType p - modify \ctx -> ctx { curAtomicLexemes = curAtomicLexemes ctx ++ [sid] } + emitAtomicLexeme sid {-# INLINE atomicLexeme #-} +emitAtomicLexeme :: LexemeId -> Parser () +emitAtomicLexeme sid = modify \ctx -> + ctx { curAtomicLexemes = curAtomicLexemes ctx ++ [sid] } + collectAtomicLexemeIds :: Parser a -> Parser ([SrcId], a) collectAtomicLexemeIds p = do prevAtomicLexemes <- gets curAtomicLexemes diff --git a/src/lib/SourceIdTraversal.hs b/src/lib/SourceIdTraversal.hs index 8333bc78..9a1a9e09 100644 --- a/src/lib/SourceIdTraversal.hs +++ b/src/lib/SourceIdTraversal.hs @@ -76,6 +76,11 @@ instance IsTree Group where CArrow l effs r -> visit l >> visit effs >> visit r CWith b body -> visit b >> visit body +instance IsTree Bin where + visit = \case + EvalBinOp b -> visit b + _ -> return () + instance IsTree CSBlock where visit = \case IndentedBlock sid decls -> enterNode sid $ visit decls @@ -126,4 +131,3 @@ instance (IsTree a, IsTree b, IsTree c) => IsTree (a, b, c) where instance IsTree AppExplicitness where visit _ = return () instance IsTree SourceName where visit _ = return () instance IsTree LetAnn where visit _ = return () -instance IsTree Bin where visit _ = return () diff --git a/src/lib/Types/Source.hs b/src/lib/Types/Source.hs index 2334cc4b..66e7ffd1 100644 --- a/src/lib/Types/Source.hs +++ b/src/lib/Types/Source.hs @@ -162,7 +162,6 @@ type GroupW = WithSrcs Group type CTopDeclW = WithSrcs CTopDecl type CSDeclW = WithSrcs CSDecl type SourceNameW = WithSrc SourceName -type BinW = WithSrc Bin type BracketedGroup = WithSrcs [GroupW] -- optional arrow, effects, result type @@ -224,7 +223,7 @@ data Group | CPrim PrimName [GroupW] | CParens [GroupW] | CBrackets [GroupW] - | CBin BinW GroupW GroupW + | CBin Bin GroupW GroupW | CJuxtapose Bool GroupW GroupW -- Bool means "there's a space between the groups" | CPrefix SourceNameW GroupW -- covers unary - and unary + among others | CGivens GivenClause @@ -250,7 +249,7 @@ data CLeaf type CaseAlt = (GroupW, CSBlock) -- scrutinee, lexeme Id, body data Bin - = EvalBinOp SourceName + = EvalBinOp SourceNameW | DepAmpersand | Dot | DepComma -- cgit v1.2.3-70-g09d2