From 80746c5142117c03affacee1e68dfc12dc514505 Mon Sep 17 00:00:00 2001 From: William Rusnack Date: Sat, 27 Apr 2024 11:08:36 -0400 Subject: [PATCH] hPutBuilder --- src/Data/Text/Internal/Builder.hs | 50 +++++++++++++++++++------------ src/Data/Text/Lazy/Builder/IO.hs | 46 ++++++++++++++++++++++++++++ text.cabal | 1 + 3 files changed, 78 insertions(+), 19 deletions(-) create mode 100644 src/Data/Text/Lazy/Builder/IO.hs diff --git a/src/Data/Text/Internal/Builder.hs b/src/Data/Text/Internal/Builder.hs index 98c778767..3c210ddc6 100644 --- a/src/Data/Text/Internal/Builder.hs +++ b/src/Data/Text/Internal/Builder.hs @@ -39,8 +39,10 @@ module Data.Text.Internal.Builder ( -- * Public API -- ** The Builder type Builder + , getTexts , toLazyText , toLazyTextWith + , starter -- ** Constructing Builders , singleton @@ -91,11 +93,19 @@ import GHC.Stack (HasCallStack) newtype Builder = Builder { -- Invariant (from Data.Text.Lazy): -- The lists include no null Texts. - runBuilder :: forall s. (Buffer s -> ST s [S.Text]) + runBuilder :: forall s + . (Int -> (Int -> ST s (A.MArray s)) -> Buffer s -> ST s [S.Text]) + -> Int -- buffer size + -> (Int -> ST s (A.MArray s)) -- new array -> Buffer s -> ST s [S.Text] } +getTexts :: Int -> (Int -> ST s (A.MArray s)) -> Builder -> ST s [S.Text] +getTexts chunkSize new b = + newBuffer new chunkSize >>= runBuilder (b `append` flush) starter chunkSize new + + instance Semigroup Builder where (<>) = append {-# INLINE (<>) #-} @@ -133,7 +143,7 @@ instance Ord Builder where -- * @'toLazyText' 'empty' = 'L.empty'@ -- empty :: Builder -empty = Builder (\ k buf -> k buf) +empty = Builder (\ k new buf -> k new buf) {-# INLINE empty #-} -- | /O(1)./ A @Builder@ taking a single character, satisfying @@ -157,6 +167,7 @@ singleton c = writeAtMost 4 $ \ marr o -> unsafeWrite marr o (safe c) -- append :: Builder -> Builder -> Builder append (Builder f) (Builder g) = Builder (f . g) +--append (Builder f) (Builder g) = Builder $ \ k new buf -> f (g k) new buf {-# INLINE [0] append #-} -- TODO: Experiment to find the right threshold. @@ -193,22 +204,20 @@ fromText t@(Text arr off l) -- -- @since 1.2.0.0 fromString :: String -> Builder -fromString str = Builder $ \k (Buffer p0 o0 u0 l0) -> - let loop !marr !o !u !l [] = k (Buffer marr o u l) +fromString str = Builder $ \k chunkSize new (Buffer p0 o0 u0 l0) -> + let loop !marr !o !u !l [] = k chunkSize new (Buffer marr o u l) loop marr o u l s@(c:cs) | l <= 3 = do A.shrinkM marr (o + u) arr <- A.unsafeFreeze marr let !t = Text arr o u - marr' <- A.new chunkSize + marr' <- new chunkSize ts <- inlineInterleaveST (loop marr' 0 0 chunkSize s) return $ t : ts | otherwise = do n <- unsafeWrite marr (o+u) (safe c) loop marr o (u+n) (l-n) cs in loop p0 o0 u0 l0 str - where - chunkSize = smallChunkSize {-# INLINEABLE fromString #-} -- | /O(1)./ A @Builder@ taking a lazy @Text@, satisfying @@ -243,18 +252,21 @@ toLazyText = toLazyTextWith smallChunkSize -- buffers will be the default buffer size. toLazyTextWith :: Int -> Builder -> L.Text toLazyTextWith chunkSize m = L.fromChunks (runST $ - newBuffer chunkSize >>= runBuilder (m `append` flush) (const (return []))) + newBuffer A.new chunkSize >>= runBuilder (m `append` flush) starter smallChunkSize A.new) + +starter :: Monad m => a -> b -> c -> m [d] +starter _ _ _ = return [] -- | /O(1)./ Pop the strict @Text@ we have constructed so far, if any, -- yielding a new chunk in the result lazy @Text@. flush :: Builder -flush = Builder $ \ k buf@(Buffer p o u l) -> +flush = Builder $ \ k cs new buf@(Buffer p o u l) -> if u == 0 - then k buf + then k cs new buf else do arr <- A.unsafeFreeze p let !b = Buffer p (o+u) 0 l !t = Text arr o u - ts <- inlineInterleaveST (k b) + ts <- inlineInterleaveST (k cs new b) return $! t : ts {-# INLINE [1] flush #-} -- defer inlining so that flush/flush rule may fire. @@ -263,18 +275,18 @@ flush = Builder $ \ k buf@(Buffer p o u l) -> -- | Sequence an ST operation on the buffer withBuffer :: (forall s. Buffer s -> ST s (Buffer s)) -> Builder -withBuffer f = Builder $ \k buf -> f buf >>= k +withBuffer f = Builder $ \k cs new buf -> f buf >>= k cs new {-# INLINE withBuffer #-} -- | Get the size of the buffer withSize :: (Int -> Builder) -> Builder -withSize f = Builder $ \ k buf@(Buffer _ _ _ l) -> - runBuilder (f l) k buf +withSize f = Builder $ \ k cs new buf@(Buffer _ _ _ l) -> + runBuilder (f l) k cs new buf {-# INLINE withSize #-} -- | Map the resulting list of texts. mapBuilder :: ([S.Text] -> [S.Text]) -> Builder -mapBuilder f = Builder (fmap f .) +mapBuilder f = Builder $ \ k cs new b -> f <$> k cs new b ------------------------------------------------------------------------ @@ -283,7 +295,7 @@ ensureFree :: Int -> Builder ensureFree !n = withSize $ \ l -> if n <= l then empty - else flush `append'` withBuffer (const (newBuffer (max n smallChunkSize))) + else flush `append'` Builder (\k chunkSize new _ -> k chunkSize new =<< newBuffer new (max n chunkSize)) {-# INLINE [0] ensureFree #-} writeAtMost :: Int -> (forall s. A.MArray s -> Int -> ST s Int) -> Builder @@ -302,9 +314,9 @@ writeBuffer f (Buffer p o u l) = do return $! Buffer p o (u+n) (l-n) {-# INLINE writeBuffer #-} -newBuffer :: Int -> ST s (Buffer s) -newBuffer size = do - arr <- A.new size +newBuffer :: (Int -> ST s (A.MArray s)) -> Int -> ST s (Buffer s) +newBuffer new size = do + arr <- new size return $! Buffer arr 0 0 size {-# INLINE newBuffer #-} diff --git a/src/Data/Text/Lazy/Builder/IO.hs b/src/Data/Text/Lazy/Builder/IO.hs new file mode 100644 index 000000000..b56e1d9b5 --- /dev/null +++ b/src/Data/Text/Lazy/Builder/IO.hs @@ -0,0 +1,46 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE RecordWildCards #-} +module Data.Text.Lazy.Builder.IO + ( hPutBuilder + , hPutBuilderUtf8 + ) where + +import System.IO (hPutBuf) +import Data.Text.Internal (Text(Text)) +import GHC.IO.Handle.Types (Handle, Handle__(..), BufferMode(BlockBuffering)) +import Data.Text.Internal.Builder (Builder, getTexts) +import Data.Text.Array (newPinned) +import Data.Array.Byte (ByteArray(ByteArray)) +import GHC.IO.Handle.Internals (flushWriteBuffer, wantWritableHandle, flushWriteBuffer) +import Data.Foldable (for_) +import GHC.Exts (byteArrayContents#) +import Control.Monad.ST (runST) +import GHC.Ptr (Ptr(Ptr)) +import GHC.IO.Encoding (textEncodingName) + +hPutBuilder :: Handle -> Builder -> IO () +hPutBuilder h b = do + (mode, nl, isUtf8) <- wantWritableHandle "hPutStr" h $ \(Handle__ {..}) -> do + let isUtf8 = maybe False (("UTF-8" ==) . textEncodingName) haCodec + return (haBufferMode, haOutputNL, isUtf8) + case mode of + --NoBuffering -> hPutChars h b + --LineBuffering -> writeLines h nl buf b + BlockBuffering _ + -- | nl == CRLF -> writeBlocksCRLF h buf b + | isUtf8 -> hPutBuilderUtf8 h b + -- | otherwise -> writeBlocksRaw h buf b + + +hPutBuilderUtf8 :: Handle -> Builder -> IO () +hPutBuilderUtf8 h b = do + flushBytes + -- ????? Does the text ByteArray have a chance of being garbage collected before the flush finishes? + for_ textBuffers $ \(Text (ByteArray a#) _ bufR) -> hPutBuf h (Ptr (byteArrayContents# a#)) bufR + where + flushBytes = wantWritableHandle "hPutBuilder" h flushWriteBuffer + textBuffers = runST $ getTexts bufferSize newPinned b + +bufferSize :: Int +bufferSize = 1024 -- I don't know what this should be diff --git a/text.cabal b/text.cabal index 9b10c97c2..b41ae9fc5 100644 --- a/text.cabal +++ b/text.cabal @@ -195,6 +195,7 @@ library Data.Text.Lazy Data.Text.Lazy.Builder Data.Text.Lazy.Builder.Int + Data.Text.Lazy.Builder.IO Data.Text.Lazy.Builder.RealFloat Data.Text.Lazy.Encoding Data.Text.Lazy.IO