Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement some CString operations using MutByteArray #2966

Merged
merged 5 commits into from
Feb 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions core/src/Streamly/Internal/Data/CString.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{-# LANGUAGE UnliftedFFITypes #-}

-- |
-- Module : Streamly.Internal.Data.CString
-- Copyright : (c) 2023 Composewell Technologies
-- License : BSD3-3-Clause
-- Maintainer : [email protected]
-- Portability : GHC
--
-- MutByteArray representing null terminated c strings.
-- All APIs in this module are unsafe and caution must be used when using them.
-- Completely experimental. Everything is subject to change without notice.

module Streamly.Internal.Data.CString
(
splice
, spliceCString
, splicePtrN
, putCString
, length
)

where

#ifdef DEBUG
#include "assert.hs"
#endif

import GHC.Ptr (Ptr(..), castPtr)
import Foreign.C (CString, CSize(..))
import GHC.Exts (MutableByteArray#, RealWorld)
import GHC.Word (Word8)

import Streamly.Internal.Data.MutByteArray.Type hiding (length)

import Prelude hiding (length)

-- XXX Use cstringLength# from GHC.CString in ghc-prim
foreign import ccall unsafe "string.h strlen" c_strlen
:: MutableByteArray# RealWorld -> IO CSize

-- XXX Use cstringLength# from GHC.CString in ghc-prim
foreign import ccall unsafe "string.h strlen" c_strlen_pinned
:: CString -> IO CSize

{-# INLINE length #-}
length :: MutByteArray -> IO Int
length (MutByteArray src#) = do
fmap fromIntegral $ c_strlen src#

-- | Join two null terminated cstrings, the null byte of the first string is
-- overwritten. Does not check the destination length or source length.
-- Destination must have enough space to accomodate src.
--
-- Returns the offset of the null byte.
--
-- /Unsafe/
splice :: MutByteArray -> MutByteArray -> IO Int
splice dst@(MutByteArray dst#) src@(MutByteArray src#) = do
srcLen <- fmap fromIntegral $ c_strlen src#
#ifdef DEBUG
srcLen1 <- length src
assertM(srcLen <= srcLen1)
#endif
dstLen <- fmap fromIntegral $ c_strlen dst#
#ifdef DEBUG
dstLen1 <- length dst
assertM(dstLen <= dstLen1)
assertM(dstLen + srcLen + 1 <= dstLen1)
#endif
unsafePutSlice src 0 dst dstLen (srcLen + 1)
return $ dstLen + srcLen

-- | Append specified number of bytes from a Ptr to a MutByteArray CString. The
-- null byte of CString is overwritten and the result is terminated with a null
-- byte.
{-# INLINE splicePtrN #-}
splicePtrN :: MutByteArray -> Ptr Word8 -> Int -> IO Int
splicePtrN dst@(MutByteArray dst#) src srcLen = do
dstLen <- fmap fromIntegral $ c_strlen dst#
#ifdef DEBUG
dstLen1 <- length dst
assertM(dstLen <= dstLen1)
assertM(dstLen + srcLen + 1 <= dstLen1)
#endif
-- unsafePutSlice src 0 dst dstLen srcLen
-- XXX unsafePutPtrN signature consistency with serialization routines
-- XXX unsafePutSlice as well
unsafePutPtrN src dst dstLen (srcLen + 1)
return $ dstLen + srcLen

-- | Join a null terminated cstring MutByteByteArray with a null terminated
-- cstring Ptr.
{-# INLINE spliceCString #-}
spliceCString :: MutByteArray -> CString -> IO Int
spliceCString dst src = do
srcLen <- fmap fromIntegral $ c_strlen_pinned src
splicePtrN dst (castPtr src) srcLen

-- XXX this is CString serialization.

-- | @putCString dst dstOffset cstr@ writes the cstring cstr at dstOffset in
-- the dst MutByteArray. The result is terminated by a null byte.
{-# INLINE putCString #-}
putCString :: MutByteArray -> Int -> CString -> IO Int
putCString dst off src = do
srcLen <- fmap fromIntegral $ c_strlen_pinned src
unsafePutPtrN (castPtr src) dst off (srcLen + 1)
return $ off + srcLen
27 changes: 22 additions & 5 deletions core/src/Streamly/Internal/Data/MutArray/Type.hs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ module Streamly.Internal.Data.MutArray.Type

-- ** Random writes
, putIndex
-- , putIndexRev -- or revPutIndex
, unsafePutIndex
, putIndices
-- , putFromThenTo
Expand Down Expand Up @@ -258,6 +259,7 @@ module Streamly.Internal.Data.MutArray.Type
-- convenience operations implemented in terms of folds.
, unsafeAppendPtrN
, appendPtrN
, appendCString
, appendCString#
-- , appendStreamGrowWith
, appendStream
Expand Down Expand Up @@ -523,6 +525,7 @@ foreign import ccall unsafe "string.h memcpy" c_memcpy_pinned_src
foreign import ccall unsafe "memchr_index" c_memchr_index
:: MutableByteArray# RealWorld -> CSize -> Word8 -> CSize -> IO CSize

-- XXX Use cstringLength# from GHC.CString in ghc-prim
foreign import ccall unsafe "string.h strlen" c_strlen_pinned
:: Addr# -> IO CSize

Expand Down Expand Up @@ -1572,8 +1575,16 @@ unsafeSliceOffLen index len (MutArray contents start e _) =
-- user cannot overwrite elements beyond the end of the slice.
(MutArray contents fp1 end end)

-- | /O(1)/ Slice an array in constant time. Throws an error if the slice
-- extends out of the array bounds.
-- | /O(1)/ Get a reference to a slice from a mutable array. Throws an error if
-- the slice extends out of the array bounds.
--
-- The capacity of the slice is the same as its length i.e. it does not have
-- any unused or reserved space at the end.
--
-- The slice shares the same underlying mutable array when created. However, if
-- the slice or the original array is reallocated by growing or shrinking then
-- it will be copied to new memory and they will no longer share the same
-- memory.
--
-- /Pre-release/
{-# INLINE sliceOffLen #-}
Expand Down Expand Up @@ -2476,13 +2487,19 @@ appendStreamN :: (MonadIO m, Unbox a) =>
Int -> MutArray a -> Stream m a -> m (MutArray a)
appendStreamN n arr = D.fold (appendMax n arr)

-- | The array grown only by required amount of space.
-- | The array is grown only by the required amount of space.
{-# INLINE appendCString# #-}
appendCString# :: MonadIO m => Addr# -> MutArray Word8 -> m (MutArray Word8)
appendCString# addr arr = do
appendCString# :: MonadIO m => MutArray Word8 -> Addr# -> m (MutArray Word8)
appendCString# arr addr = do
len <- liftIO $ c_strlen_pinned addr
appendPtrN arr (Ptr addr) (fromIntegral len)

-- Note: in hsc code # is treated in a special way, so it is difficult to use
-- appendCString#
{-# INLINE appendCString #-}
appendCString :: MonadIO m => MutArray Word8 -> Ptr a -> m (MutArray Word8)
appendCString arr (Ptr addr) = appendCString# arr addr

-------------------------------------------------------------------------------
-- Folds for creating
-------------------------------------------------------------------------------
Expand Down
10 changes: 8 additions & 2 deletions core/src/Streamly/Internal/Data/MutByteArray/Type.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ module Streamly.Internal.Data.MutByteArray.Type
, getMutByteArray#

-- ** Helpers
, unsafeByteCmp
, touch

-- ** Pinning
Expand All @@ -34,12 +33,19 @@ module Streamly.Internal.Data.MutByteArray.Type

-- ** Access
, length
, unsafeAsPtr

-- ** Modify
, unsafePutSlice
, unsafePutPtrN

-- ** Copy
, unsafeCloneSliceAs
, unsafeCloneSlice
, unsafePinnedCloneSlice -- XXX unsafeCloneSlice'
, unsafeAsPtr

-- ** Compare
, unsafeByteCmp

-- ** Capacity Management
, blockSize
Expand Down
1 change: 1 addition & 0 deletions core/src/Streamly/Internal/FileSystem/Path/Common.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1817,6 +1817,7 @@ mkQ f =
-- Operations of Path
------------------------------------------------------------------------------

-- See also cstringLength# in GHC.CString in ghc-prim
foreign import ccall unsafe "string.h strlen" c_strlen_pinned
:: Addr# -> IO CSize

Expand Down
17 changes: 13 additions & 4 deletions core/src/Streamly/Internal/FileSystem/Posix/ReadDir.hsc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ module Streamly.Internal.FileSystem.Posix.ReadDir
#if !defined(mingw32_HOST_OS) && !defined(__MINGW32__)
DirStream (..)
, openDirStream
, openDirStreamCString
, closeDirStream
, readDirStreamEither
, readEitherChunks
Expand All @@ -29,7 +30,7 @@ import Data.Char (ord)
import Foreign (Ptr, Word8, nullPtr, peek, peekByteOff, castPtr, plusPtr)
import Foreign.C
(resetErrno, Errno(..), getErrno, eINTR, throwErrno
, throwErrnoIfMinus1Retry_, CInt(..), CString, CChar, CSize(..))
, throwErrnoIfMinus1Retry_, throwErrnoIfNullRetry, CInt(..), CString, CChar, CSize(..))
import Foreign.Storable (poke)
import Fusion.Plugin.Types (Fuse(..))
import Streamly.Internal.Data.Array (Array(..))
Expand Down Expand Up @@ -80,15 +81,22 @@ foreign import ccall unsafe "dirent.h readdir"
foreign import ccall unsafe "lstat_is_directory"
c_lstat_is_directory :: CString -> IO CInt

-- XXX Use openat instead of open so that we do not have to build and resolve
-- absolute paths.
--
-- | The CString must be pinned.
{-# INLINE openDirStreamCString #-}
openDirStreamCString :: CString -> IO DirStream
openDirStreamCString s = do
-- XXX we do not decode the path here, just print it as cstring
-- XXX pass lazy concat of "openDirStream: " ++ s
dirp <- throwErrnoIfNullRetry "openDirStream" $ c_opendir s
return (DirStream dirp)

-- XXX Path is not null terminated therefore we need to make a copy even if the
-- array is pinned.
-- {-# INLINE openDirStream #-}
openDirStream :: PosixPath -> IO DirStream
openDirStream p =
Array.asCStringUnsafe (Path.toChunk p) $ \s -> do
-- openDirStreamCString s
dirp <- throwErrnoPathIfNullRetry "openDirStream" p $ c_opendir s
return (DirStream dirp)

Expand Down Expand Up @@ -362,6 +370,7 @@ readEitherChunks alldirs =
foreign import ccall unsafe "string.h memcpy" c_memcpy
:: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)

-- See also cstringLength# in GHC.CString in ghc-prim
foreign import ccall unsafe "string.h strlen" c_strlen
:: Ptr CChar -> IO CSize

Expand Down
1 change: 1 addition & 0 deletions core/streamly-core.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ library

-- streamly-core-array-types
, Streamly.Internal.Data.MutByteArray
, Streamly.Internal.Data.CString

-- streaming and parsing Haskell types to/from bytes
, Streamly.Internal.Data.Binary.Parser
Expand Down
Loading