Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor transcript parser #5235

Merged
merged 2 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions unison-cli/src/Unison/Codebase/Transcript.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{-# LANGUAGE PatternSynonyms #-}

-- | The data model for Unison transcripts.
module Unison.Codebase.Transcript
( ExpectingError,
ScratchFileName,
Hidden (..),
UcmLine (..),
UcmContext (..),
APIRequest (..),
pattern CMarkCodeBlock,
Stanza,
ProcessedBlock (..),
)
where

import CMark qualified
import Unison.Core.Project (ProjectBranchName, ProjectName)
import Unison.Prelude
import Unison.Project (ProjectAndBranch)

type ExpectingError = Bool

type ScratchFileName = Text

data Hidden = Shown | HideOutput | HideAll
deriving (Eq, Show)

data UcmLine
= UcmCommand UcmContext Text
| -- | Text does not include the '--' prefix.
UcmComment Text

-- | Where a command is run: a project branch (myproject/mybranch>).
data UcmContext
= UcmContextProject (ProjectAndBranch ProjectName ProjectBranchName)

data APIRequest
= GetRequest Text
| APIComment Text

pattern CMarkCodeBlock :: (Maybe CMark.PosInfo) -> Text -> Text -> CMark.Node
pattern CMarkCodeBlock pos info body = CMark.Node pos (CMark.CODE_BLOCK info body) []

type Stanza = Either CMark.Node ProcessedBlock

data ProcessedBlock
= Ucm Hidden ExpectingError [UcmLine]
| Unison Hidden ExpectingError (Maybe ScratchFileName) Text
| API [APIRequest]
166 changes: 166 additions & 0 deletions unison-cli/src/Unison/Codebase/Transcript/Parser.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
-- | Parse and print CommonMark (like Github-flavored Markdown) transcripts.
module Unison.Codebase.Transcript.Parser
( -- * printing
formatAPIRequest,
formatUcmLine,
formatStanza,
formatNode,
formatProcessedBlock,

-- * conversion
processedBlockToNode,

-- * parsing
stanzas,
ucmLine,
apiRequest,
fenced,
hidden,
expectingError,
language,
)
where

import CMark qualified
import Data.Char qualified as Char
import Data.Text qualified as Text
import Data.These (These (..))
import Text.Megaparsec qualified as P
import Unison.Codebase.Transcript
import Unison.Prelude
import Unison.Project (ProjectAndBranch (ProjectAndBranch))

formatAPIRequest :: APIRequest -> Text
formatAPIRequest = \case
GetRequest txt -> "GET " <> txt
APIComment txt -> "-- " <> txt

formatUcmLine :: UcmLine -> Text
formatUcmLine = \case
UcmCommand context txt -> formatContext context <> "> " <> txt
UcmComment txt -> "--" <> txt
where
formatContext (UcmContextProject projectAndBranch) = into @Text projectAndBranch

formatStanza :: Stanza -> Text
formatStanza = either formatNode formatProcessedBlock

formatNode :: CMark.Node -> Text
formatNode = (<> "\n") . CMark.nodeToCommonmark [] Nothing

formatProcessedBlock :: ProcessedBlock -> Text
formatProcessedBlock = formatNode . processedBlockToNode

processedBlockToNode :: ProcessedBlock -> CMark.Node
processedBlockToNode = \case
Ucm _ _ cmds -> CMarkCodeBlock Nothing "ucm" $ foldr ((<>) . formatUcmLine) "" cmds
Unison _hide _ fname txt ->
CMarkCodeBlock Nothing "unison" $ maybe txt (\fname -> Text.unlines ["---", "title: " <> fname, "---", txt]) fname
API apiRequests -> CMarkCodeBlock Nothing "api" $ Text.unlines $ formatAPIRequest <$> apiRequests

type P = P.Parsec Void Text

stanzas :: FilePath -> Text -> Either (P.ParseErrorBundle Text Void) [Stanza]
stanzas srcName = (\(CMark.Node _ _DOCUMENT blocks) -> traverse stanzaFromNode blocks) . CMark.commonmarkToNode []
where
stanzaFromNode :: CMark.Node -> Either (P.ParseErrorBundle Text Void) Stanza
stanzaFromNode node = case node of
CMarkCodeBlock _ info body -> maybe (Left node) pure <$> P.parse (fenced info) srcName body
_ -> pure $ Left node

ucmLine :: P UcmLine
ucmLine = ucmCommand <|> ucmComment
where
ucmCommand :: P UcmLine
ucmCommand = do
context <-
P.try do
contextString <- P.takeWhile1P Nothing (/= '>')
context <-
case (tryFrom @Text contextString) of
(Right (These project branch)) -> pure (UcmContextProject (ProjectAndBranch project branch))
_ -> fail "expected project/branch or absolute path"
void $ lineToken $ word ">"
pure context
line <- P.takeWhileP Nothing (/= '\n') <* spaces
pure $ UcmCommand context line

ucmComment :: P UcmLine
ucmComment = do
word "--"
line <- P.takeWhileP Nothing (/= '\n') <* spaces
pure $ UcmComment line

apiRequest :: P APIRequest
apiRequest = do
apiComment <|> getRequest
where
getRequest = do
word "GET"
spaces
path <- P.takeWhile1P Nothing (/= '\n')
spaces
pure (GetRequest path)
apiComment = do
word "--"
comment <- P.takeWhileP Nothing (/= '\n')
spaces
pure (APIComment comment)

-- | Produce the correct parser for the code block based on the provided info string.
fenced :: Text -> P (Maybe ProcessedBlock)
fenced info = do
body <- P.getInput
P.setInput info
fenceType <- lineToken (word "ucm" <|> word "unison" <|> word "api" <|> language)
case fenceType of
"ucm" -> do
hide <- hidden
err <- expectingError
P.setInput body
pure . Ucm hide err <$> (spaces *> many ucmLine)
"unison" ->
do
-- todo: this has to be more interesting
-- ```unison:hide
-- ```unison
-- ```unison:hide:all scratch.u
hide <- lineToken hidden
err <- lineToken expectingError
fileName <- optional untilSpace1
P.setInput body
pure . Unison hide err fileName <$> (spaces *> P.getInput)
"api" -> do
P.setInput body
pure . API <$> (spaces *> many apiRequest)
_ -> pure Nothing

word :: Text -> P Text
word txt = P.try $ do
chs <- P.takeP (Just $ show txt) (Text.length txt)
guard (chs == txt)
pure txt

lineToken :: P a -> P a
lineToken p = p <* nonNewlineSpaces

nonNewlineSpaces :: P ()
nonNewlineSpaces = void $ P.takeWhileP Nothing (\ch -> ch == ' ' || ch == '\t')

hidden :: P Hidden
hidden =
(HideAll <$ word ":hide:all")
<|> (HideOutput <$ word ":hide")
<|> pure Shown

expectingError :: P ExpectingError
expectingError = isJust <$> optional (word ":error")

untilSpace1 :: P Text
untilSpace1 = P.takeWhile1P Nothing (not . Char.isSpace)

language :: P Text
language = P.takeWhileP Nothing (\ch -> Char.isDigit ch || Char.isLower ch || ch == '_')

spaces :: P ()
spaces = void $ P.takeWhileP (Just "spaces") Char.isSpace
Loading