diff --git a/.gitignore b/.gitignore index 02d18a82..4c1b6dda 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,24 @@ TAGS cabal-dev dist +dist-newstyle *sandbox* #*# *.*~ specs samples .stack-work -*.lock \ No newline at end of file + +# nix +result +result-doc +*.lock +*.o +*.hi +*.prof +*.aux +*.hp +*.ps +.envrc +.direnv +cabal.project.local diff --git a/benchmarks/Main.hs b/benchmarks/Main.hs index 8d483519..72c9ceef 100644 --- a/benchmarks/Main.hs +++ b/benchmarks/Main.hs @@ -2,9 +2,17 @@ module Main (main) where import Codec.Xlsx +import Codec.Xlsx.Parser.Stream +import Codec.Xlsx.Writer.Stream +import Control.DeepSeq +import Control.Lens +import Control.Monad (void) import Criterion.Main import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LB +import qualified Data.Conduit as C +import qualified Data.Conduit.Combinators as C +import Data.Maybe main :: IO () main = do @@ -12,10 +20,29 @@ main = do -- "data/6000.rows.x.26.cols.xlsx" bs <- BS.readFile filename let bs' = LB.fromStrict bs + parsed :: Xlsx + parsed = toXlsxFast bs' + idx <- fmap (fromMaybe (error "ix not found")) $ runXlsxM filename $ makeIndexFromName "Sample list" + items <- runXlsxM filename $ collectItems idx + deepseq (parsed, bs', idx, items) (pure ()) defaultMain [ bgroup "readFile" [ bench "with xlsx" $ nf toXlsx bs' , bench "with xlsx fast" $ nf toXlsxFast bs' + , bench "with stream (counting)" $ nfIO $ runXlsxM filename $ countRowsInSheet idx + , bench "with stream (reading)" $ nfIO $ runXlsxM filename $ readSheet idx (pure . rwhnf) + ] + , bgroup + "writeFile" + [ bench "with xlsx" $ nf (fromXlsx 0) parsed + , bench "with stream (no sst)" $ + nfIO $ C.runConduit $ + void (writeXlsxWithSharedStrings defaultSettings mempty $ C.yieldMany $ view si_row <$> items) + C..| C.fold + , bench "with stream (sst)" $ + nfIO $ C.runConduit $ + void (writeXlsx defaultSettings $ C.yieldMany $ view si_row <$> items) + C..| C.fold ] ] diff --git a/data/inline-strings.xlsx b/data/inline-strings.xlsx new file mode 100644 index 00000000..fbf94e16 Binary files /dev/null and b/data/inline-strings.xlsx differ diff --git a/default.nix b/default.nix new file mode 100644 index 00000000..fb959d3b --- /dev/null +++ b/default.nix @@ -0,0 +1,27 @@ +let + rev = "07ca3a021f05d6ff46bbd03c418b418abb781279"; # first 21.05 release + url = "https://github.com/NixOS/nixpkgs/archive/${rev}.tar.gz"; + compiler = "ghc884"; + isLibraryProfiling = false; + pkgs = import (builtins.fetchTarball url) { + config = if isLibraryProfiling then ({ + packageOverrides = pkgs_super: { + haskell = pkgs_super.haskell // { + packages = pkgs_super.haskell.packages // { + "${compiler}" = pkgs_super.haskell.packages."${compiler}".override { + overrides = self: super: { + mkDerivation = args: super.mkDerivation (args // { + enableLibraryProfiling = true; + }); + }; + }; + }; + }; + }; + }) else {}; + }; + + hpkgs = pkgs.haskell.packages."${compiler}"; +in pkgs.haskell.lib.overrideCabal (hpkgs.callCabal2nix "xlsx" ./. {}) { + libraryToolDepends = [pkgs.cabal-install]; +} diff --git a/shell.nix b/shell.nix new file mode 100644 index 00000000..cf32fb68 --- /dev/null +++ b/shell.nix @@ -0,0 +1,2 @@ +(import ./default.nix).env # not flake-based +# (import ./.).devShell."${builtins.currentSystem}" # flake-based diff --git a/src/Codec/Xlsx/Parser.hs b/src/Codec/Xlsx/Parser.hs index a210c5b7..a9d0b35f 100644 --- a/src/Codec/Xlsx/Parser.hs +++ b/src/Codec/Xlsx/Parser.hs @@ -2,6 +2,7 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE NoMonomorphismRestriction #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PackageImports #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} @@ -16,7 +17,7 @@ module Codec.Xlsx.Parser , Parser ) where -import qualified Codec.Archive.Zip as Zip +import qualified "zip-archive" Codec.Archive.Zip as Zip import Control.Applicative import Control.Arrow (left) import Control.Error.Safe (headErr) @@ -27,7 +28,7 @@ import Lens.Micro #else import Control.Lens hiding ((<.>), element, views) #endif -import Control.Monad (forM, join, void) +import Control.Monad (join, void) import Control.Monad.Except (catchError, throwError) import Data.Bool (bool) import Data.ByteString (ByteString) @@ -54,7 +55,6 @@ import Codec.Xlsx.Parser.Internal import Codec.Xlsx.Parser.Internal.PivotTable import Codec.Xlsx.Types import Codec.Xlsx.Types.Cell (formulaDataFromCursor) -import Codec.Xlsx.Types.Common (xlsxTextToCellValue) import Codec.Xlsx.Types.Internal import Codec.Xlsx.Types.Internal.CfPair import Codec.Xlsx.Types.Internal.CommentTable as CommentTable @@ -71,7 +71,7 @@ import Codec.Xlsx.Types.PivotTable.Internal toXlsx :: L.ByteString -> Xlsx toXlsx = either (error . show) id . toXlsxEither -data ParseError = InvalidZipArchive +data ParseError = InvalidZipArchive String | MissingFile FilePath | InvalidFile FilePath Text | InvalidRef FilePath RefId @@ -106,7 +106,7 @@ toXlsxEitherBase :: -> L.ByteString -> Parser Xlsx toXlsxEitherBase parseSheet bs = do - ar <- left (const InvalidZipArchive) $ Zip.toArchiveOrFail bs + ar <- left InvalidZipArchive $ Zip.toArchiveOrFail bs sst <- getSharedStrings ar contentTypes <- getContentTypes ar (wfs, names, cacheSources, dateBase) <- readWorkbook ar diff --git a/src/Codec/Xlsx/Parser/Internal/Memoize.hs b/src/Codec/Xlsx/Parser/Internal/Memoize.hs new file mode 100644 index 00000000..dad1da8a --- /dev/null +++ b/src/Codec/Xlsx/Parser/Internal/Memoize.hs @@ -0,0 +1,53 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TypeApplications #-} + +-- | I rewrote: https://hackage.haskell.org/package/unliftio-0.2.20/docs/src/UnliftIO.Memoize.html#Memoized +-- for monad trans basecontrol +-- we don't need a generic `m` anyway. it's good enough in base IO. +module Codec.Xlsx.Parser.Internal.Memoize + ( Memoized + , runMemoized + , memoizeRef + ) where + +import Control.Applicative as A +import Control.Monad (join) +import Control.Monad.IO.Class +import Data.IORef +import Control.Exception + +-- | A \"run once\" value, with results saved. Extract the value with +-- 'runMemoized'. For single-threaded usage, you can use 'memoizeRef' to +-- create a value. If you need guarantees that only one thread will run the +-- action at a time, use 'memoizeMVar'. +-- +-- Note that this type provides a 'Show' instance for convenience, but not +-- useful information can be provided. +newtype Memoized a = Memoized (IO a) + deriving (Functor, A.Applicative, Monad) +instance Show (Memoized a) where + show _ = "<>" + +-- | Extract a value from a 'Memoized', running an action if no cached value is +-- available. +runMemoized :: MonadIO m => Memoized a -> m a +runMemoized (Memoized m) = liftIO m +{-# INLINE runMemoized #-} + +-- | Create a new 'Memoized' value using an 'IORef' under the surface. Note that +-- the action may be run in multiple threads simultaneously, so this may not be +-- thread safe (depending on the underlying action). +memoizeRef :: IO a -> IO (Memoized a) +memoizeRef action = do + ref <- newIORef Nothing + pure $ Memoized $ do + mres <- readIORef ref + res <- + case mres of + Just res -> pure res + Nothing -> do + res <- try @SomeException action + writeIORef ref $ Just res + pure res + either throwIO pure res diff --git a/src/Codec/Xlsx/Parser/Stream.hs b/src/Codec/Xlsx/Parser/Stream.hs new file mode 100644 index 00000000..5229c25b --- /dev/null +++ b/src/Codec/Xlsx/Parser/Stream.hs @@ -0,0 +1,708 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PackageImports #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StrictData #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UndecidableInstances #-} + +-- | +-- Module : Codex.Xlsx.Parser.Stream +-- Description : Stream parser for xlsx files +-- Copyright : +-- (c) Adam, 2021 +-- (c) Supercede, 2021 +-- License : MIT +-- Stability : experimental +-- Portability : POSIX +-- +-- Parse @.xlsx@ sheets in constant memory. +-- +-- All actions on an xlsx file run inside the 'XlsxM' monad, and must +-- be run with 'runXlsxM'. XlsxM is not a monad transformer, a design +-- inherited from the "zip" package's ZipArchive monad. +-- +-- Inside the XlsxM monad, you can stream 'SheetItem's (a row) from a +-- particular sheet, using 'readSheetByIndex', which is callback-based and tied to IO. +-- +module Codec.Xlsx.Parser.Stream + ( XlsxM + , runXlsxM + , WorkbookInfo(..) + , SheetInfo(..) + , wiSheets + , getWorkbookInfo + , CellRow + , readSheet + , countRowsInSheet + , collectItems + -- ** Index + , SheetIndex + , makeIndex + , makeIndexFromName + -- ** SheetItem + , SheetItem(..) + , si_sheet_index + , si_row + -- ** Row + , Row(..) + , ri_row_index + , ri_cell_row + -- * Errors + , SheetErrors(..) + , AddCellErrors(..) + , CoordinateErrors(..) + , TypeError(..) + , WorkbookError(..) + ) where + +import qualified "zip" Codec.Archive.Zip as Zip +import Codec.Xlsx.Types.Cell +import Codec.Xlsx.Types.Common +import Codec.Xlsx.Types.Internal (RefId (..)) +import Codec.Xlsx.Types.Internal.Relationships (Relationship (..), + Relationships (..)) +import Conduit (PrimMonad, (.|)) +import qualified Conduit as C +import qualified Data.Vector as V +#ifdef USE_MICROLENS +import Lens.Micro +import Lens.Micro.GHC () +import Lens.Micro.Mtl +import Lens.Micro.Platform +import Lens.Micro.TH +#else +import Control.Lens +#endif +import Codec.Xlsx.Parser.Internal +import Control.Monad.Catch +import Control.Monad.Except +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.Bifunctor +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS +import Data.Conduit (ConduitT) +import qualified Data.DList as DL +import Data.Foldable +import Data.IORef +import Data.IntMap.Strict (IntMap) +import qualified Data.IntMap.Strict as IntMap +import qualified Data.Map.Strict as M +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Lazy as LT +import qualified Data.Text.Lazy.Builder as TB +import qualified Data.Text.Read as Read +import Data.Traversable (for) +import Data.XML.Types +import GHC.Generics +import Control.DeepSeq +import Codec.Xlsx.Parser.Internal.Memoize + +import qualified Codec.Xlsx.Parser.Stream.HexpatInternal as HexpatInternal +import Control.Monad.Base +import Control.Monad.Trans.Control +import Text.XML.Expat.Internal.IO as Hexpat +import Text.XML.Expat.SAX as Hexpat + +#ifdef USE_MICROLENS +(<>=) :: (MonadState s m, Monoid a) => ASetter' s a -> a -> m () +l <>= a = modify (l <>~ a) +#else +#endif + +type CellRow = IntMap Cell + +-- | Sheet item +-- +-- The current sheet at a time, every sheet is constructed of these items. +data SheetItem = MkSheetItem + { _si_sheet_index :: Int -- ^ The sheet number + , _si_row :: ~Row + } deriving stock (Generic, Show) + deriving anyclass NFData + +data Row = MkRow + { _ri_row_index :: Int -- ^ Row number + , _ri_cell_row :: ~CellRow -- ^ Row itself + } deriving stock (Generic, Show) + deriving anyclass NFData + +makeLenses 'MkSheetItem +makeLenses 'MkRow + +type SharedStringsMap = V.Vector Text + +-- | Type of the excel value +-- +-- Note: Some values are untyped and rules of their type resolution are not known. +-- They may be treated simply as strings as well as they may be context-dependent. +-- By far we do not bother with it. +data ExcelValueType + = TS -- ^ shared string + | TStr -- ^ either an inline string ("inlineStr") or a formula string ("str") + | TN -- ^ number + | TB -- ^ boolean + | TE -- ^ excell error, the sheet can contain error values, for example if =1/0, causes division by zero + | Untyped -- ^ Not all values have types + deriving stock (Generic, Show) + +-- | State for parsing sheets +data SheetState = MkSheetState + { _ps_row :: ~CellRow -- ^ Current row + , _ps_sheet_index :: Int -- ^ Current sheet ID (AKA 'sheetInfoSheetId') + , _ps_cell_row_index :: Int -- ^ Current row number + , _ps_cell_col_index :: Int -- ^ Current column number + , _ps_cell_style :: Maybe Int + , _ps_is_in_val :: Bool -- ^ Flag for indexing wheter the parser is in value or not + , _ps_shared_strings :: SharedStringsMap -- ^ Shared string map + , _ps_type :: ExcelValueType -- ^ The last detected value type + + , _ps_text_buf :: Text + -- ^ for hexpat only, which can break up char data into multiple events + , _ps_worksheet_ended :: Bool + -- ^ For hexpat only, which can throw errors right at the end of the sheet + -- rather than ending gracefully. + } deriving stock (Generic, Show) +makeLenses 'MkSheetState + +-- | State for parsing shared strings +data SharedStringsState = MkSharedStringsState + { _ss_string :: TB.Builder -- ^ String we are parsing + , _ss_list :: DL.DList Text -- ^ list of shared strings + } deriving stock (Generic, Show) +makeLenses 'MkSharedStringsState + +type HasSheetState = MonadState SheetState +type HasSharedStringsState = MonadState SharedStringsState + +-- | Represents sheets from the workbook.xml file. E.g. +-- HexpatEvent -> m (Maybe Text) +parseSharedStrings = \case + StartElement "t" _ -> Nothing <$ (ss_string .= mempty) + EndElement "t" -> Just . LT.toStrict . TB.toLazyText <$> gets _ss_string + CharacterData txt -> Nothing <$ (ss_string <>= TB.fromText txt) + _ -> pure Nothing + +-- | Run a series of actions on an Xlsx file +runXlsxM :: MonadIO m => FilePath -> XlsxM a -> m a +runXlsxM xlsxFile (XlsxM act) = liftIO $ do + -- TODO: don't run the withArchive multiple times but use liftWith or runInIO instead + _xs_workbook_info <- memoizeRef (Zip.withArchive xlsxFile readWorkbookInfo) + _xs_relationships <- memoizeRef (Zip.withArchive xlsxFile readWorkbookRelationships) + _xs_shared_strings <- memoizeRef (Zip.withArchive xlsxFile parseSharedStringss) + Zip.withArchive xlsxFile $ runReaderT act $ MkXlsxMState{..} + +liftZip :: Zip.ZipArchive a -> XlsxM a +liftZip = XlsxM . ReaderT . const + +parseSharedStringss :: Zip.ZipArchive (V.Vector Text) +parseSharedStringss = do + sharedStrsSel <- Zip.mkEntrySelector "xl/sharedStrings.xml" + hasSharedStrs <- Zip.doesEntryExist sharedStrsSel + if not hasSharedStrs + then pure mempty + else do + let state0 = initialSharedStrings + byteSrc <- Zip.getEntrySource sharedStrsSel + st <- liftIO $ runExpat state0 byteSrc $ \evs -> forM_ evs $ \ev -> do + mTxt <- parseSharedStrings ev + for_ mTxt $ \txt -> + ss_list %= (`DL.snoc` txt) + pure $ V.fromList $ DL.toList $ _ss_list st + +{-# SCC getOrParseSharedStringss #-} +getOrParseSharedStringss :: XlsxM (V.Vector Text) +getOrParseSharedStringss = runMemoized =<< asks _xs_shared_strings + +readWorkbookInfo :: Zip.ZipArchive WorkbookInfo +readWorkbookInfo = do + sel <- Zip.mkEntrySelector "xl/workbook.xml" + src <- Zip.getEntrySource sel + sheets <- liftIO $ runExpat [] src $ \evs -> forM_ evs $ \case + StartElement ("sheet" :: ByteString) attrs -> do + nm <- lookupBy "name" attrs + sheetId <- lookupBy "sheetId" attrs + rId <- lookupBy "r:id" attrs + sheetNum <- either (throwM . ParseDecimalError sheetId) pure $ eitherDecimal sheetId + modify' (SheetInfo nm (RefId rId) sheetNum :) + _ -> pure () + pure $ WorkbookInfo sheets + +lookupBy :: MonadThrow m => ByteString -> [(ByteString, Text)] -> m Text +lookupBy fields attrs = maybe (throwM $ LookupError attrs fields) pure $ lookup fields attrs + +-- | Returns information about the workbook, found in +-- xl/workbook.xml. The result is cached so the XML will only be +-- decompressed and parsed once inside a larger XlsxM action. +getWorkbookInfo :: XlsxM WorkbookInfo +getWorkbookInfo = runMemoized =<< asks _xs_workbook_info + +readWorkbookRelationships :: Zip.ZipArchive Relationships +readWorkbookRelationships = do + sel <- Zip.mkEntrySelector "xl/_rels/workbook.xml.rels" + src <- Zip.getEntrySource sel + liftIO $ fmap Relationships $ runExpat mempty src $ \evs -> forM_ evs $ \case + StartElement ("Relationship" :: ByteString) attrs -> do + rId <- lookupBy "Id" attrs + rTarget <- lookupBy "Target" attrs + rType <- lookupBy "Type" attrs + modify' $ M.insert (RefId rId) $ + Relationship { relType = rType, + relTarget = T.unpack rTarget + } + _ -> pure () + +-- | Gets relationships for the workbook (this means the filenames in +-- the relationships map are relative to "xl/" base path within the +-- zip file. +-- +-- The relationships xml file will only be parsed once when called +-- multiple times within a larger XlsxM action. +getWorkbookRelationships :: XlsxM Relationships +getWorkbookRelationships = runMemoized =<< asks _xs_relationships + +type HexpatEvent = SAXEvent ByteString Text + +relIdToEntrySelector :: RefId -> XlsxM (Maybe Zip.EntrySelector) +relIdToEntrySelector rid = do + Relationships rels <- getWorkbookRelationships + for (M.lookup rid rels) $ \rel -> do + Zip.mkEntrySelector $ "xl/" <> relTarget rel + +sheetIdToRelId :: Int -> XlsxM (Maybe RefId) +sheetIdToRelId sheetId = do + WorkbookInfo sheets <- getWorkbookInfo + pure $ sheetInfoRelId <$> find ((== sheetId) . sheetInfoSheetId) sheets + +sheetIdToEntrySelector :: Int -> XlsxM (Maybe Zip.EntrySelector) +sheetIdToEntrySelector sheetId = do + sheetIdToRelId sheetId >>= \case + Nothing -> pure Nothing + Just rid -> relIdToEntrySelector rid + +-- If the given sheet number exists, returns Just a conduit source of the stream +-- of XML events in a particular sheet. Returns Nothing when the sheet doesn't +-- exist. +{-# SCC getSheetXmlSource #-} +getSheetXmlSource :: + (PrimMonad m, MonadThrow m, C.MonadResource m) => + Int -> + XlsxM (Maybe (ConduitT () ByteString m ())) +getSheetXmlSource sheetId = do + -- TODO: The Zip library may throw exceptions that aren't exposed from this + -- module, so downstream library users would need to add the 'zip' package to + -- handle them. Consider re-wrapping zip library exceptions, or just + -- re-export them? + mSheetSel <- sheetIdToEntrySelector sheetId + sheetExists <- maybe (pure False) (liftZip . Zip.doesEntryExist) mSheetSel + case mSheetSel of + Just sheetSel + | sheetExists -> + Just <$> liftZip (Zip.getEntrySource sheetSel) + _ -> pure Nothing + +{-# SCC runExpat #-} +runExpat :: forall state tag text. + (GenericXMLString tag, GenericXMLString text) => + state -> + ConduitT () ByteString (C.ResourceT IO) () -> + ([SAXEvent tag text] -> StateT state IO ()) -> + IO state +runExpat initialState byteSource handler = do + -- Set up state + ref <- newIORef initialState + -- Set up parser and callbacks + (parseChunk, _getLoc) <- Hexpat.hexpatNewParser Nothing Nothing False + let noExtra _ offset = pure ((), offset) + {-# SCC processChunk #-} + {-# INLINE processChunk #-} + processChunk isFinalChunk chunk = do + (buf, len, mError) <- parseChunk chunk isFinalChunk + saxen <- HexpatInternal.parseBuf buf len noExtra + case mError of + Just err -> error $ "expat error: " <> show err + Nothing -> do + state0 <- liftIO $ readIORef ref + state1 <- + {-# SCC "runExpat_runStateT_call" #-} + execStateT (handler $ map fst saxen) state0 + writeIORef ref state1 + C.runConduitRes $ + byteSource .| + C.awaitForever (liftIO . processChunk False) + processChunk True BS.empty + readIORef ref + +runExpatForSheet :: + SheetState -> + ConduitT () ByteString (C.ResourceT IO) () -> + (SheetItem -> IO ()) -> + XlsxM () +runExpatForSheet initState byteSource inner = + void $ liftIO $ runExpat initState byteSource handler + where + sheetName = _ps_sheet_index initState + handler evs = forM_ evs $ \ev -> do + parseRes <- runExceptT $ matchHexpatEvent ev + case parseRes of + Left err -> throwM err + Right (Just cellRow) + | not (IntMap.null cellRow) -> do + rowNum <- use ps_cell_row_index + liftIO $ inner $ MkSheetItem sheetName $ MkRow rowNum cellRow + _ -> pure () + +-- | this will collect the sheetitems in a list. +-- useful for cases were memory is of no concern but a sheetitem +-- type in a list is needed. +collectItems :: + SheetIndex -> + XlsxM [SheetItem] +collectItems sheetId = do + res <- liftIO $ newIORef [] + void $ readSheet sheetId $ \item -> + liftIO (modifyIORef' res (item :)) + fmap reverse $ liftIO $ readIORef res + +-- | datatype representing a sheet index, looking it up by name +-- can be done with 'makeIndexFromName', which is the preferred approach. +-- although 'makeIndex' is available in case it's already known. +newtype SheetIndex = MkSheetIndex Int + deriving newtype NFData + +-- | This does *no* checking if the index exists or not. +-- you could have index out of bounds issues because of this. +makeIndex :: Int -> SheetIndex +makeIndex = MkSheetIndex + +-- | Look up the index of a case insensitive sheet name +makeIndexFromName :: Text -> XlsxM (Maybe SheetIndex) +makeIndexFromName sheetName = do + wi <- getWorkbookInfo + -- The Excel UI does not allow a user to create two sheets whose + -- names differ only in alphabetic case (at least for ascii...) + let sheetNameCI = T.toLower sheetName + findRes :: Maybe SheetInfo + findRes = find ((== sheetNameCI) . T.toLower . sheetInfoName) $ _wiSheets wi + pure $ makeIndex . sheetInfoSheetId <$> findRes + +readSheet :: + SheetIndex -> + -- | Function to consume the sheet's rows + (SheetItem -> IO ()) -> + -- | Returns False if sheet doesn't exist, or True otherwise + XlsxM Bool +readSheet (MkSheetIndex sheetId) inner = do + mSrc :: Maybe (ConduitT () ByteString (C.ResourceT IO) ()) <- + getSheetXmlSource sheetId + let + case mSrc of + Nothing -> pure False + Just sourceSheetXml -> do + sharedStrs <- getOrParseSharedStringss + let sheetState0 = initialSheetState + & ps_shared_strings .~ sharedStrs + & ps_sheet_index .~ sheetId + runExpatForSheet sheetState0 sourceSheetXml inner + pure True + +-- | Returns number of rows in the given sheet (identified by the +-- sheet's ID, AKA the sheetId attribute, AKA 'sheetInfoSheetId'), or Nothing +-- if the sheet does not exist. Does not perform a full parse of the +-- XML into 'SheetItem's, so it should be more efficient than counting +-- via 'readSheetByIndex'. +countRowsInSheet :: SheetIndex -> XlsxM (Maybe Int) +countRowsInSheet (MkSheetIndex sheetId) = do + mSrc :: Maybe (ConduitT () ByteString (C.ResourceT IO) ()) <- + getSheetXmlSource sheetId + for mSrc $ \sourceSheetXml -> do + liftIO $ runExpat @Int @ByteString @ByteString 0 sourceSheetXml $ \evs -> + forM_ evs $ \case + StartElement "row" _ -> modify' (+1) + _ -> pure () + +-- | Return row from the state and empty it +popRow :: HasSheetState m => m CellRow +popRow = do + row <- use ps_row + ps_row .= mempty + pure row + +data AddCellErrors + = ReadError -- ^ Could not read current cell value + Text -- ^ Original value + String -- ^ Error message + | SharedStringsNotFound -- ^ Could not find string by index in shared string table + Int -- ^ Given index + (V.Vector Text) -- ^ Given shared strings to lookup in + deriving Show + +-- | Parse the given value +-- +-- If it's a string, we try to get it our of a shared string table +{-# SCC parseValue #-} +parseValue :: SharedStringsMap -> Text -> ExcelValueType -> Either AddCellErrors CellValue +parseValue sstrings txt = \case + TS -> do + (idx, _) <- ReadError txt `first` Read.decimal @Int txt + string <- maybe (Left $ SharedStringsNotFound idx sstrings) Right $ {-# SCC "sstrings_lookup_scc" #-} (sstrings ^? ix idx) + Right $ CellText string + TStr -> pure $ CellText txt + TN -> bimap (ReadError txt) (CellDouble . fst) $ Read.double txt + TE -> bimap (ReadError txt) (CellError . fst) $ fromAttrVal txt + TB | txt == "1" -> Right $ CellBool True + | txt == "0" -> Right $ CellBool False + | otherwise -> Left $ ReadError txt "Could not read Excel boolean value (expected 0 or 1)" + Untyped -> Right (parseUntypedValue txt) + +-- TODO: some of the cells are untyped and we need to test whether +-- they all are strings or something more complicated +parseUntypedValue :: Text -> CellValue +parseUntypedValue = CellText + +-- | Adds a cell to row in state monad +{-# SCC addCellToRow #-} +addCellToRow + :: ( MonadError SheetErrors m + , HasSheetState m + ) + => Text -> m () +addCellToRow txt = do + st <- get + style <- use ps_cell_style + when (_ps_is_in_val st) $ do + val <- liftEither $ first ParseCellError $ parseValue (_ps_shared_strings st) txt (_ps_type st) + put $ st { _ps_row = IntMap.insert (_ps_cell_col_index st) + (Cell { _cellStyle = style + , _cellValue = Just val + , _cellComment = Nothing + , _cellFormula = Nothing + }) $ _ps_row st} + +data SheetErrors + = ParseCoordinateError CoordinateErrors -- ^ Error while parsing coordinates + | ParseTypeError TypeError -- ^ Error while parsing types + | ParseCellError AddCellErrors -- ^ Error while parsing cells + | ParseStyleErrors StyleError + | HexpatParseError Hexpat.XMLParseError + deriving stock Show + deriving anyclass Exception + +type SheetValue = (ByteString, Text) +type SheetValues = [SheetValue] + +data CoordinateErrors + = CoordinateNotFound SheetValues -- ^ If the coordinate was not specified in "r" attribute + | NoListElement SheetValue SheetValues -- ^ If the value is empty for some reason + | NoTextContent Content SheetValues -- ^ If the value has something besides @ContentText@ inside + | DecodeFailure Text SheetValues -- ^ If malformed coordinate text was passed + deriving stock Show + deriving anyclass Exception + +data TypeError + = TypeNotFound SheetValues + | TypeNoListElement SheetValue SheetValues + | UnkownType Text SheetValues + | TypeNoTextContent Content SheetValues + deriving Show + deriving anyclass Exception + +data WorkbookError = LookupError { lookup_attrs :: [(ByteString, Text)], lookup_field :: ByteString } + | ParseDecimalError Text String + deriving Show + deriving anyclass Exception + +{-# SCC matchHexpatEvent #-} +matchHexpatEvent :: + ( MonadError SheetErrors m, + HasSheetState m + ) => + HexpatEvent -> + m (Maybe CellRow) +matchHexpatEvent ev = case ev of + CharacterData txt -> {-# SCC "handle_CharData" #-} do + inVal <- use ps_is_in_val + when inVal $ + {-# SCC "append_text_buf" #-} (ps_text_buf <>= txt) + pure Nothing + StartElement "c" attrs -> Nothing <$ (setCoord attrs *> setType attrs *> setStyle attrs) + StartElement "is" _ -> Nothing <$ (ps_is_in_val .= True) + EndElement "is" -> Nothing <$ finaliseCellValue + StartElement "v" _ -> Nothing <$ (ps_is_in_val .= True) + EndElement "v" -> Nothing <$ finaliseCellValue + -- If beginning of row, empty the state and return nothing. + -- We don't know if there is anything in the state, the user may have + -- decided to (not closing). In any case it's the beginning of a new row + -- so we clear the state. + StartElement "row" _ -> Nothing <$ popRow + -- If at the end of the row, we have collected the whole row into + -- the current state. Empty the state and return the row. + EndElement "row" -> Just <$> popRow + StartElement "worksheet" _ -> ps_worksheet_ended .= False >> pure Nothing + EndElement "worksheet" -> ps_worksheet_ended .= True >> pure Nothing + -- Skip everything else, e.g. the formula elements + FailDocument err -> do + -- this event is emitted at the end the xml stream (possibly + -- because the xml files in xlsx archives don't end in a + -- newline, but that's a guess), so we use state to determine if + -- it's expected. + finished <- use ps_worksheet_ended + unless finished $ + throwError $ HexpatParseError err + pure Nothing + _ -> pure Nothing + +{-# INLINE finaliseCellValue #-} +finaliseCellValue :: + ( MonadError SheetErrors m, HasSheetState m ) => m () +finaliseCellValue = do + txt <- gets _ps_text_buf + addCellToRow txt + modify' $ \st -> + st { _ps_is_in_val = False + , _ps_text_buf = mempty + } + +-- | Update state coordinates accordingly to @parseCoordinates@ +{-# SCC setCoord #-} +setCoord + :: ( MonadError SheetErrors m + , HasSheetState m + ) + => SheetValues -> m () +setCoord list = do + coordinates <- liftEither $ first ParseCoordinateError $ parseCoordinates list + ps_cell_col_index .= (coordinates ^. _2) + ps_cell_row_index .= (coordinates ^. _1) + +-- | Parse type from values and update state accordingly +setType + :: ( MonadError SheetErrors m + , HasSheetState m + ) + => SheetValues -> m () +setType list = do + type' <- liftEither $ first ParseTypeError $ parseType list + ps_type .= type' + +-- | Find sheet value by its name +findName :: ByteString -> SheetValues -> Maybe SheetValue +findName name = find ((name ==) . fst) +{-# INLINE findName #-} + +setStyle :: (MonadError SheetErrors m, HasSheetState m) => SheetValues -> m () +setStyle list = do + style <- liftEither $ first ParseStyleErrors $ parseStyle list + ps_cell_style .= style + +data StyleError = InvalidStyleRef { seInput:: Text, seErrorMsg :: String} + deriving Show + +parseStyle :: SheetValues -> Either StyleError (Maybe Int) +parseStyle list = + case findName "s" list of + Nothing -> pure Nothing + Just (_nm, valTex) -> case Read.decimal valTex of + Left err -> Left (InvalidStyleRef valTex err) + Right (i, _rem) -> pure $ Just i + +-- | Parse value type +{-# SCC parseType #-} +parseType :: SheetValues -> Either TypeError ExcelValueType +parseType list = + case findName "t" list of + Nothing -> pure Untyped + Just (_nm, valText)-> + case valText of + "n" -> Right TN + "s" -> Right TS + -- "Cell containing a formula string". Probably shouldn't be TStr.. + "str" -> Right TStr + "inlineStr" -> Right TStr + "b" -> Right TB + "e" -> Right TE + other -> Left $ UnkownType other list + +-- | Parse coordinates from a list of xml elements if such were found on "r" key +{-# SCC parseCoordinates #-} +parseCoordinates :: SheetValues -> Either CoordinateErrors (Int, Int) +parseCoordinates list = do + (_nm, valText) <- maybe (Left $ CoordinateNotFound list) Right $ findName "r" list + maybe (Left $ DecodeFailure valText list) Right $ fromSingleCellRef $ CellRef valText diff --git a/src/Codec/Xlsx/Parser/Stream/HexpatInternal.hs b/src/Codec/Xlsx/Parser/Stream/HexpatInternal.hs new file mode 100644 index 00000000..d8f669fe --- /dev/null +++ b/src/Codec/Xlsx/Parser/Stream/HexpatInternal.hs @@ -0,0 +1,95 @@ +{- +Under BSD 3-Clause license, (c) 2009 Doug Beardsley , (c) 2009-2012 Stephen Blackheath , (c) 2009 Gregory Collins, (c) 2008 Evan Martin , (c) 2009 Matthew Pocock , (c) 2007-2009 Galois Inc., (c) 2010 Kevin Jardine, (c) 2012 Simon Hengel + +From https://hackage.haskell.org/package/hexpat-0.20.13 + https://github.com/the-real-blackh/hexpat/blob/master/Text/XML/Expat/SAX.hs#L227 + +copied over because the upstream library doesn't expose this function. +-} +module Codec.Xlsx.Parser.Stream.HexpatInternal (parseBuf) where + +import Control.Monad +import Text.XML.Expat.SAX +import qualified Data.ByteString.Internal as I +import Data.Bits +import Data.Int +import Data.ByteString.Internal (c_strlen) +import Data.Word +import Foreign.C +import Foreign.ForeignPtr +import Foreign.Ptr +import Foreign.Storable + +{-# SCC parseBuf #-} +parseBuf :: (GenericXMLString tag, GenericXMLString text) => + ForeignPtr Word8 -> CInt -> (Ptr Word8 -> Int -> IO (a, Int)) -> IO [(SAXEvent tag text, a)] +parseBuf buf _ processExtra = withForeignPtr buf $ \pBuf -> doit [] pBuf 0 + where + roundUp32 offset = (offset + 3) .&. complement 3 + doit acc pBuf offset0 = offset0 `seq` do + typ <- peek (pBuf `plusPtr` offset0 :: Ptr Word32) + (a, offset) <- processExtra pBuf (offset0 + 4) + case typ of + 0 -> return (reverse acc) + 1 -> do + nAtts <- peek (pBuf `plusPtr` offset :: Ptr Word32) + let pName = pBuf `plusPtr` (offset + 4) + lName <- fromIntegral <$> c_strlen pName + let name = gxFromByteString $ I.fromForeignPtr buf (offset + 4) lName + (atts, offset') <- foldM (\(atts, offset) _ -> do + let pAtt = pBuf `plusPtr` offset + lAtt <- fromIntegral <$> c_strlen pAtt + let att = gxFromByteString $ I.fromForeignPtr buf offset lAtt + offset' = offset + lAtt + 1 + pValue = pBuf `plusPtr` offset' + lValue <- fromIntegral <$> c_strlen pValue + let value = gxFromByteString $ I.fromForeignPtr buf offset' lValue + return ((att, value):atts, offset' + lValue + 1) + ) ([], offset + 4 + lName + 1) [1,3..nAtts] + doit ((StartElement name (reverse atts), a) : acc) pBuf (roundUp32 offset') + 2 -> do + let pName = pBuf `plusPtr` offset + lName <- fromIntegral <$> c_strlen pName + let name = gxFromByteString $ I.fromForeignPtr buf offset lName + offset' = offset + lName + 1 + doit ((EndElement name, a) : acc) pBuf (roundUp32 offset') + 3 -> do + len <- fromIntegral <$> peek (pBuf `plusPtr` offset :: Ptr Word32) + let text = gxFromByteString $ I.fromForeignPtr buf (offset + 4) len + offset' = offset + 4 + len + doit ((CharacterData text, a) : acc) pBuf (roundUp32 offset') + 4 -> do + let pEnc = pBuf `plusPtr` offset + lEnc <- fromIntegral <$> c_strlen pEnc + let enc = gxFromByteString $ I.fromForeignPtr buf offset lEnc + offset' = offset + lEnc + 1 + pVer = pBuf `plusPtr` offset' + pVerFirst <- peek (castPtr pVer :: Ptr Word8) + (mVer, offset'') <- case pVerFirst of + 0 -> return (Nothing, offset' + 1) + 1 -> do + lVer <- fromIntegral <$> c_strlen (pVer `plusPtr` 1) + return (Just $ gxFromByteString $ I.fromForeignPtr buf (offset' + 1) lVer, offset' + 1 + lVer + 1) + _ -> error "hexpat: bad data from C land" + cSta <- peek (pBuf `plusPtr` offset'' :: Ptr Int8) + let sta = if cSta < 0 then Nothing else + if cSta == 0 then Just False else + Just True + doit ((XMLDeclaration enc mVer sta, a) : acc) pBuf (roundUp32 (offset'' + 1)) + 5 -> doit ((StartCData, a) : acc) pBuf offset + 6 -> doit ((EndCData, a) : acc) pBuf offset + 7 -> do + let pTarget = pBuf `plusPtr` offset + lTarget <- fromIntegral <$> c_strlen pTarget + let target = gxFromByteString $ I.fromForeignPtr buf offset lTarget + offset' = offset + lTarget + 1 + pData = pBuf `plusPtr` offset' + lData <- fromIntegral <$> c_strlen pData + let dat = gxFromByteString $ I.fromForeignPtr buf offset' lData + doit ((ProcessingInstruction target dat, a) : acc) pBuf (roundUp32 (offset' + lData + 1)) + 8 -> do + let pText = pBuf `plusPtr` offset + lText <- fromIntegral <$> c_strlen pText + let text = gxFromByteString $ I.fromForeignPtr buf offset lText + doit ((Comment text, a) : acc) pBuf (roundUp32 (offset + lText + 1)) + _ -> error "hexpat: bad data from C land" diff --git a/src/Codec/Xlsx/Types.hs b/src/Codec/Xlsx/Types.hs index a185de28..2a7a4a01 100644 --- a/src/Codec/Xlsx/Types.hs +++ b/src/Codec/Xlsx/Types.hs @@ -4,6 +4,8 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE RankNTypes #-} + module Codec.Xlsx.Types ( -- * The main types Xlsx(..) @@ -48,6 +50,10 @@ module Codec.Xlsx.Types ( , Cell.cellStyle , Cell.cellComment , Cell.cellFormula + -- ** Row properties + , rowHeightLens + , _CustomHeight + , _AutomaticHeight -- * Style helpers , emptyStyles , renderStyleSheet @@ -64,8 +70,9 @@ module Codec.Xlsx.Types ( import Control.Exception (SomeException, toException) #ifdef USE_MICROLENS import Lens.Micro.TH +import Data.Profunctor(dimap) +import Data.Profunctor.Choice #else -import Control.Lens.TH #endif import Control.DeepSeq (NFData) import qualified Data.ByteString.Lazy as L @@ -99,6 +106,12 @@ import Codec.Xlsx.Types.StyleSheet as X import Codec.Xlsx.Types.Table as X import Codec.Xlsx.Types.Variant as X import Codec.Xlsx.Writer.Internal +#ifdef USE_MICROLENS +import Lens.Micro +#else +import Control.Lens (lens, Lens', makeLenses) +import Control.Lens.TH (makePrisms) +#endif -- | Height of a row in points (1/72in) data RowHeight @@ -109,6 +122,40 @@ data RowHeight deriving (Eq, Ord, Show, Read, Generic) instance NFData RowHeight +#ifdef USE_MICROLENS +-- Since micro-lens denies the existence of prisms, +-- I pasted the splice that's generated from makePrisms, +-- then I copied over the definitions from Control.Lens for the prism +-- function as well. +type Prism s t a b = forall p f. (Choice p, Applicative f) => p a (f b) -> p s (f t) +type Prism' s a = Prism s s a a + +prism :: (b -> t) -> (s -> Either t a) -> Prism s t a b +prism bt seta = dimap seta (either pure (fmap bt)) . right' + +_CustomHeight :: Prism' RowHeight Double +_CustomHeight + = (prism (\ x1_a4xgd -> CustomHeight x1_a4xgd)) + (\ x_a4xge + -> case x_a4xge of + CustomHeight y1_a4xgf -> Right y1_a4xgf + _ -> Left x_a4xge) +{-# INLINE _CustomHeight #-} + +_AutomaticHeight :: Prism' RowHeight Double +_AutomaticHeight + = (prism (\ x1_a4xgg -> AutomaticHeight x1_a4xgg)) + (\ x_a4xgh + -> case x_a4xgh of + AutomaticHeight y1_a4xgi -> Right y1_a4xgi + _ -> Left x_a4xgh) +{-# INLINE _AutomaticHeight #-} + +#else +makePrisms ''RowHeight +#endif + + -- | Properties of a row. See §18.3.1.73 "row (Row)" for more details data RowProperties = RowProps { rowHeight :: Maybe RowHeight @@ -120,6 +167,9 @@ data RowProperties = RowProps } deriving (Eq, Ord, Show, Read, Generic) instance NFData RowProperties +rowHeightLens :: Lens' RowProperties (Maybe RowHeight) +rowHeightLens = lens rowHeight $ \x y -> x{rowHeight=y} + instance Default RowProperties where def = RowProps { rowHeight = Nothing , rowStyle = Nothing diff --git a/src/Codec/Xlsx/Types/Common.hs b/src/Codec/Xlsx/Types/Common.hs index a820070f..1a5c9e3d 100644 --- a/src/Codec/Xlsx/Types/Common.hs +++ b/src/Codec/Xlsx/Types/Common.hs @@ -1,6 +1,10 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE RankNTypes #-} + module Codec.Xlsx.Types.Common ( CellRef(..) , singleCellRef @@ -20,6 +24,14 @@ module Codec.Xlsx.Types.Common , dateToNumber , int2col , col2int + -- ** prisms + , _XlsxText + , _XlsxRichText + , _CellText + , _CellDouble + , _CellBool + , _CellRich + , _CellError ) where import GHC.Generics (Generic) @@ -43,6 +55,15 @@ import Text.XML.Cursor import Codec.Xlsx.Parser.Internal import Codec.Xlsx.Types.RichText import Codec.Xlsx.Writer.Internal +#ifdef USE_MICROLENS +import Lens.Micro +import Lens.Micro.Internal +import Lens.Micro.GHC () +import Data.Profunctor.Choice +import Data.Profunctor(dimap) +#else +import Control.Lens(makePrisms) +#endif -- | convert column number (starting from 1) to its textual form (e.g. 3 -> \"C\") int2col :: Int -> Text @@ -170,6 +191,7 @@ data CellValue | CellError ErrorType deriving (Eq, Ord, Show, Generic) + instance NFData CellValue -- | The evaluation of an expression can result in an error having one @@ -408,3 +430,78 @@ instance ToAttrVal ErrorType where toAttrVal ErrorNum = "#NUM!" toAttrVal ErrorRef = "#REF!" toAttrVal ErrorValue = "#VALUE!" + +#ifdef USE_MICROLENS +-- Since micro-lens denies the existence of prisms, +-- I pasted the splice that's generated from makePrisms, +-- then I copied over the definitions from Control.Lens for the prism +-- function as well. +-- Essentially this is doing the template haskell by hand. +type Prism s t a b = forall p f. (Choice p, Applicative f) => p a (f b) -> p s (f t) +type Prism' s a = Prism s s a a + +prism :: (b -> t) -> (s -> Either t a) -> Prism s t a b +prism bt seta = dimap seta (either pure (fmap bt)) . right' + +_CellText :: Prism' CellValue Text +_CellText + = (prism (\ x1_a1ZQv -> CellText x1_a1ZQv)) + (\ x_a1ZQw + -> case x_a1ZQw of + CellText y1_a1ZQx -> Right y1_a1ZQx + _ -> Left x_a1ZQw) +{-# INLINE _CellText #-} +_CellDouble :: Prism' CellValue Double +_CellDouble + = (prism (\ x1_a1ZQy -> CellDouble x1_a1ZQy)) + (\ x_a1ZQz + -> case x_a1ZQz of + CellDouble y1_a1ZQA -> Right y1_a1ZQA + _ -> Left x_a1ZQz) +{-# INLINE _CellDouble #-} +_CellBool :: Prism' CellValue Bool +_CellBool + = (prism (\ x1_a1ZQB -> CellBool x1_a1ZQB)) + (\ x_a1ZQC + -> case x_a1ZQC of + CellBool y1_a1ZQD -> Right y1_a1ZQD + _ -> Left x_a1ZQC) +{-# INLINE _CellBool #-} +_CellRich :: Prism' CellValue [RichTextRun] +_CellRich + = (prism (\ x1_a1ZQE -> CellRich x1_a1ZQE)) + (\ x_a1ZQF + -> case x_a1ZQF of + CellRich y1_a1ZQG -> Right y1_a1ZQG + _ -> Left x_a1ZQF) +{-# INLINE _CellRich #-} +_CellError :: Prism' CellValue ErrorType +_CellError + = (prism (\ x1_a1ZQH -> CellError x1_a1ZQH)) + (\ x_a1ZQI + -> case x_a1ZQI of + CellError y1_a1ZQJ -> Right y1_a1ZQJ + _ -> Left x_a1ZQI) +{-# INLINE _CellError #-} + +_XlsxText :: Prism' XlsxText Text +_XlsxText + = (prism (\ x1_a1ZzU -> XlsxText x1_a1ZzU)) + (\ x_a1ZzV + -> case x_a1ZzV of + XlsxText y1_a1ZzW -> Right y1_a1ZzW + _ -> Left x_a1ZzV) +{-# INLINE _XlsxText #-} +_XlsxRichText :: Prism' XlsxText [RichTextRun] +_XlsxRichText + = (prism (\ x1_a1ZzX -> XlsxRichText x1_a1ZzX)) + (\ x_a1ZzY + -> case x_a1ZzY of + XlsxRichText y1_a1ZzZ -> Right y1_a1ZzZ + _ -> Left x_a1ZzY) +{-# INLINE _XlsxRichText #-} + +#else +makePrisms ''XlsxText +makePrisms ''CellValue +#endif diff --git a/src/Codec/Xlsx/Writer.hs b/src/Codec/Xlsx/Writer.hs index e04124d6..a35762b8 100644 --- a/src/Codec/Xlsx/Writer.hs +++ b/src/Codec/Xlsx/Writer.hs @@ -2,6 +2,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE PackageImports #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE DeriveGeneric #-} -- | This module provides a function for serializing structured `Xlsx` into lazy bytestring @@ -9,7 +10,7 @@ module Codec.Xlsx.Writer ( fromXlsx ) where -import qualified Codec.Archive.Zip as Zip +import qualified "zip-archive" Codec.Archive.Zip as Zip import Control.Arrow (second) #ifdef USE_MICROLENS import Lens.Micro diff --git a/src/Codec/Xlsx/Writer/Internal.hs b/src/Codec/Xlsx/Writer/Internal.hs index 29dbf2ee..4afb97ab 100644 --- a/src/Codec/Xlsx/Writer/Internal.hs +++ b/src/Codec/Xlsx/Writer/Internal.hs @@ -3,6 +3,7 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE DeriveGeneric #-} + module Codec.Xlsx.Writer.Internal ( -- * Rendering documents ToDocument(..) diff --git a/src/Codec/Xlsx/Writer/Internal/Stream.hs b/src/Codec/Xlsx/Writer/Internal/Stream.hs new file mode 100644 index 00000000..93253896 --- /dev/null +++ b/src/Codec/Xlsx/Writer/Internal/Stream.hs @@ -0,0 +1,51 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TemplateHaskell #-} + +-- | Internal stream related functions. +-- These are exported because they're tested like this. +-- It's not expected a user would need this. +module Codec.Xlsx.Writer.Internal.Stream + ( upsertSharedString + , initialSharedString + , string_map + , SharedStringState(..) + ) where + + +#ifdef USE_MICROLENS +import Lens.Micro.Platform +#else +import Control.Lens +#endif +import Control.Monad.State.Strict +import Data.Map.Strict (Map) +import Data.Maybe +import Data.Text (Text) + +newtype SharedStringState = MkSharedStringState + { _string_map :: Map Text Int + } +makeLenses 'MkSharedStringState + +initialSharedString :: SharedStringState +initialSharedString = MkSharedStringState mempty + +-- properties: +-- for a list of [text], every unique text gets a unique number. +upsertSharedString :: MonadState SharedStringState m => Text -> m (Text,Int) +upsertSharedString current = do + strings <- use string_map + + let mIdx :: Maybe Int + mIdx = strings ^? ix current + + idx :: Int + idx = fromMaybe (length strings) mIdx + + newMap :: Map Text Int + newMap = at current ?~ idx $ strings + + string_map .= newMap + pure (current, idx) + diff --git a/src/Codec/Xlsx/Writer/Stream.hs b/src/Codec/Xlsx/Writer/Stream.hs new file mode 100644 index 00000000..ce57600c --- /dev/null +++ b/src/Codec/Xlsx/Writer/Stream.hs @@ -0,0 +1,360 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StrictData #-} +{-# LANGUAGE TemplateHaskell #-} + +-- | Writes Excel files from a stream, which allows creation of +-- large Excel files while remaining in constant memory. +module Codec.Xlsx.Writer.Stream + ( writeXlsx + , writeXlsxWithSharedStrings + , SheetWriteSettings(..) + , defaultSettings + , wsSheetView + , wsZip + , wsColumnProperties + , wsRowProperties + , wsStyles + -- *** Shared strings + , sharedStrings + , sharedStringsStream + ) where + +import Codec.Archive.Zip.Conduit.UnZip +import Codec.Archive.Zip.Conduit.Zip +import Codec.Xlsx.Parser.Internal (n_) +import Codec.Xlsx.Parser.Stream +import Codec.Xlsx.Types (ColumnsProperties (..), RowProperties (..), + Styles (..), _AutomaticHeight, _CustomHeight, + emptyStyles, rowHeightLens) +import Codec.Xlsx.Types.Cell +import Codec.Xlsx.Types.Common +import Codec.Xlsx.Types.Internal.Relationships (odr, pr) +import Codec.Xlsx.Types.SheetViews +import Codec.Xlsx.Writer.Internal (nonEmptyElListSimple, toAttrVal, toElement, + txtd, txti) +import Codec.Xlsx.Writer.Internal.Stream +import Conduit (PrimMonad, yield, (.|)) +import qualified Conduit as C +#ifdef USE_MICROLENS +import Data.Traversable.WithIndex +import Lens.Micro.Platform +#else +import Control.Lens +#endif +import Control.Monad.Catch +import Control.Monad.Reader.Class +import Control.Monad.State.Strict +import Data.ByteString (ByteString) +import Data.ByteString.Builder (Builder) +import Data.Coerce +import Data.Conduit (ConduitT) +import qualified Data.Conduit.List as CL +import Data.Foldable (fold, traverse_) +import Data.List +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map +import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as Text +import Data.Time +import Data.Word +import Data.XML.Types +import Text.Printf +import Text.XML (toXMLElement) +import qualified Text.XML as TXML +import Text.XML.Stream.Render +import Text.XML.Unresolved (elementToEvents) + + +upsertSharedStrings :: MonadState SharedStringState m => Row -> m [(Text,Int)] +upsertSharedStrings row = + traverse upsertSharedString items + where + items :: [Text] + items = row ^.. ri_cell_row . traversed . cellValue . _Just . _CellText + +-- | Process sheetItems into shared strings structure to be put into +-- 'writeXlsxWithSharedStrings' +sharedStrings :: Monad m => ConduitT Row b m (Map Text Int) +sharedStrings = void sharedStringsStream .| CL.foldMap (uncurry Map.singleton) + +-- | creates a unique number for every encountered string in the stream +-- This is used for creating a required structure in the xlsx format +-- called shared strings. Every string get's transformed into a number +-- +-- exposed to allow further processing, we also know the map after processing +-- but I don't think conduit provides a way of getting that out. +-- use 'sharedStrings' to just get the map +sharedStringsStream :: Monad m => + ConduitT Row (Text, Int) m (Map Text Int) +sharedStringsStream = fmap (view string_map) $ C.execStateC initialSharedString $ + CL.mapFoldableM upsertSharedStrings + +-- | Settings for writing a single sheet. +data SheetWriteSettings = MkSheetWriteSettings + { _wsSheetView :: [SheetView] + , _wsZip :: ZipOptions -- ^ Enable zipOpt64=True if you intend writing large xlsx files, zip needs 64bit for files over 4gb. + , _wsColumnProperties :: [ColumnsProperties] + , _wsRowProperties :: Map Int RowProperties + , _wsStyles :: Styles + } +instance Show SheetWriteSettings where + -- ZipOptions lacks a show instance-} + show (MkSheetWriteSettings s _ y r _) = printf "MkSheetWriteSettings{ _wsSheetView=%s, _wsColumnProperties=%s, _wsZip=defaultZipOptions, _wsRowProperties=%s }" (show s) (show y) (show r) +makeLenses ''SheetWriteSettings + +defaultSettings :: SheetWriteSettings +defaultSettings = MkSheetWriteSettings + { _wsSheetView = [] + , _wsColumnProperties = [] + , _wsRowProperties = mempty + , _wsStyles = emptyStyles + , _wsZip = defaultZipOptions { + zipOpt64 = False + -- There is a magick number in the zip archive package, + -- https://hackage.haskell.org/package/zip-archive-0.4.1/docs/src/Codec.Archive.Zip.html#local-6989586621679055672 + -- if we enable 64bit the number doesn't align causing the test to fail. + } + } + + + +-- | Transform a 'Row' stream into a stream that creates the xlsx file format +-- (to be consumed by sinkfile for example) +-- This first runs 'sharedStrings' and then 'writeXlsxWithSharedStrings'. +-- If you want xlsx files this is the most obvious function to use. +-- the others are exposed in case you can cache the shared strings for example. +-- +-- Note that the current implementation concatenates everything into a single sheet. +-- In other words there is no support for writing multiple sheets +writeXlsx :: MonadThrow m + => PrimMonad m + => SheetWriteSettings -- ^ use 'defaultSettings' + -> ConduitT () Row m () -- ^ the conduit producing sheetitems + -> ConduitT () ByteString m Word64 -- ^ result conduit producing xlsx files +writeXlsx settings sheetC = do + sstrings <- sheetC .| sharedStrings + writeXlsxWithSharedStrings settings sstrings sheetC + + +-- TODO maybe should use bimap instead: https://hackage.haskell.org/package/bimap-0.4.0/docs/Data-Bimap.html +-- it guarantees uniqueness of both text and int +-- | This write Excel file with a shared strings lookup table. +-- It appears that it is optional. +-- Failed lookups will result in valid xlsx. +-- There are several conditions on shared strings, +-- +-- 1. Every text to int is unique on both text and int. +-- 2. Every Int should have a gap no greater than 1. [("xx", 3), ("yy", 4)] is okay, whereas [("xx", 3), ("yy", 5)] is not. +-- 3. It's expected this starts from 0. +-- +-- Use 'sharedStringsStream' to get a good shared strings table. +-- This is provided because the user may have a more efficient way of +-- constructing this table than the library can provide, +-- for example through database operations. +writeXlsxWithSharedStrings :: MonadThrow m => PrimMonad m + => SheetWriteSettings + -> Map Text Int -- ^ shared strings table + -> ConduitT () Row m () + -> ConduitT () ByteString m Word64 +writeXlsxWithSharedStrings settings sharedStrings' items = + combinedFiles settings sharedStrings' items .| zipStream (settings ^. wsZip) + +-- massive amount of boilerplate needed for excel to function +boilerplate :: forall m . PrimMonad m => SheetWriteSettings -> Map Text Int -> [(ZipEntry, ZipData m)] +boilerplate settings sharedStrings' = + [ (zipEntry "xl/sharedStrings.xml", ZipDataSource $ writeSst sharedStrings' .| eventsToBS) + , (zipEntry "[Content_Types].xml", ZipDataSource $ writeContentTypes .| eventsToBS) + , (zipEntry "xl/workbook.xml", ZipDataSource $ writeWorkbook .| eventsToBS) + , (zipEntry "xl/styles.xml", ZipDataByteString $ coerce $ settings ^. wsStyles) + , (zipEntry "xl/_rels/workbook.xml.rels", ZipDataSource $ writeWorkbookRels .| eventsToBS) + , (zipEntry "_rels/.rels", ZipDataSource $ writeRootRels .| eventsToBS) + ] + +combinedFiles :: PrimMonad m + => SheetWriteSettings + -> Map Text Int + -> ConduitT () Row m () + -> ConduitT () (ZipEntry, ZipData m) m () +combinedFiles settings sharedStrings' items = + C.yieldMany $ + boilerplate settings sharedStrings' <> + [(zipEntry "xl/worksheets/sheet1.xml", ZipDataSource $ + items .| C.runReaderC settings (writeWorkSheet sharedStrings') .| eventsToBS )] + +el :: Monad m => Name -> Monad m => forall i. ConduitT i Event m () -> ConduitT i Event m () +el x = tag x mempty + +-- Clark notation is used a lot for xml namespaces in this module: +-- +-- Name has an IsString instance which parses it +override :: Monad m => Text -> Text -> forall i. ConduitT i Event m () +override content' part = + tag "{http://schemas.openxmlformats.org/package/2006/content-types}Override" + (attr "ContentType" content' + <> attr "PartName" part) $ pure () + + +-- | required by Excel. +writeContentTypes :: Monad m => forall i. ConduitT i Event m () +writeContentTypes = doc "{http://schemas.openxmlformats.org/package/2006/content-types}Types" $ do + override "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml" "/xl/workbook.xml" + override "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml" "/xl/sharedStrings.xml" + override "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml" "/xl/styles.xml" + override "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml" "/xl/worksheets/sheet1.xml" + override "application/vnd.openxmlformats-package.relationships+xml" "/xl/_rels/workbook.xml.rels" + override "application/vnd.openxmlformats-package.relationships+xml" "/_rels/.rels" + +-- | required by Excel. +writeWorkbook :: Monad m => forall i. ConduitT i Event m () +writeWorkbook = doc (n_ "workbook") $ do + el (n_ "sheets") $ do + tag (n_ "sheet") + (attr "name" "Sheet1" + <> attr "sheetId" "1" <> + attr (odr "id") "rId3") $ + pure () + +doc :: Monad m => Name -> forall i. ConduitT i Event m () -> ConduitT i Event m () +doc root docM = do + yield EventBeginDocument + el root docM + yield EventEndDocument + +relationship :: Monad m => Text -> Int -> Text -> forall i. ConduitT i Event m () +relationship target id' type' = + tag (pr "Relationship") + (attr "Type" type' + <> attr "Id" (Text.pack $ "rId" <> show id') + <> attr "Target" target + ) $ pure () + +writeWorkbookRels :: Monad m => forall i. ConduitT i Event m () +writeWorkbookRels = doc (pr "Relationships") $ do + relationship "sharedStrings.xml" 1 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings" + relationship "worksheets/sheet1.xml" 3 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" + relationship "styles.xml" 2 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" + +writeRootRels :: Monad m => forall i. ConduitT i Event m () +writeRootRels = doc (pr "Relationships") $ + relationship "xl/workbook.xml" 1 "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" + + +zipEntry :: Text -> ZipEntry +zipEntry x = ZipEntry + { zipEntryName = Left x + , zipEntryTime = LocalTime (toEnum 0) midnight + , zipEntrySize = Nothing + , zipEntryExternalAttributes = Nothing + } + +eventsToBS :: PrimMonad m => ConduitT Event ByteString m () +eventsToBS = writeEvents .| C.builderToByteString + +writeSst :: Monad m => Map Text Int -> forall i. ConduitT i Event m () +writeSst sharedStrings' = doc (n_ "sst") $ + void $ traverse (el (n_ "si") . el (n_ "t") . content . fst + ) $ sortBy (\(_, i) (_, y :: Int) -> compare i y) $ Map.toList sharedStrings' + +writeEvents :: PrimMonad m => ConduitT Event Builder m () +writeEvents = renderBuilder (def {rsPretty=False}) + +sheetViews :: forall m . MonadReader SheetWriteSettings m => forall i . ConduitT i Event m () +sheetViews = do + sheetView <- view wsSheetView + + unless (null sheetView) $ el (n_ "sheetViews") $ do + let + view' :: [Element] + view' = setNameSpaceRec spreadSheetNS . toXMLElement . toElement (n_ "sheetView") <$> sheetView + + C.yieldMany $ elementToEvents =<< view' + +spreadSheetNS :: Text +spreadSheetNS = fold $ nameNamespace $ n_ "" + +setNameSpaceRec :: Text -> Element -> Element +setNameSpaceRec space xelm = + xelm {elementName = ((elementName xelm ){nameNamespace = + Just space }) + , elementNodes = elementNodes xelm <&> \case + NodeElement x -> NodeElement (setNameSpaceRec space x) + y -> y + } + +columns :: MonadReader SheetWriteSettings m => ConduitT Row Event m () +columns = do + colProps <- view wsColumnProperties + let cols :: Maybe TXML.Element + cols = nonEmptyElListSimple (n_ "cols") $ map (toElement (n_ "col")) colProps + traverse_ (C.yieldMany . elementToEvents . toXMLElement) cols + +writeWorkSheet :: MonadReader SheetWriteSettings m => Map Text Int -> ConduitT Row Event m () +writeWorkSheet sharedStrings' = doc (n_ "worksheet") $ do + sheetViews + columns + el (n_ "sheetData") $ C.awaitForever (mapRow sharedStrings') + +mapRow :: MonadReader SheetWriteSettings m => Map Text Int -> Row -> ConduitT Row Event m () +mapRow sharedStrings' sheetItem = do + mRowProp <- preview $ wsRowProperties . ix rowIx . rowHeightLens . _Just . failing _CustomHeight _AutomaticHeight + let rowAttr :: Attributes + rowAttr = ixAttr <> fold (attr "ht" . txtd <$> mRowProp) + tag (n_ "row") rowAttr $ + void $ itraverse (mapCell sharedStrings' rowIx) (sheetItem ^. ri_cell_row) + where + rowIx = sheetItem ^. ri_row_index + ixAttr = attr "r" $ toAttrVal rowIx + +mapCell :: Monad m => Map Text Int -> Int -> Int -> Cell -> ConduitT Row Event m () +mapCell sharedStrings' rix cix cell = + when (has (cellValue . _Just) cell || has (cellStyle . _Just) cell) $ + tag (n_ "c") celAttr $ + when (has (cellValue . _Just) cell) $ + el (n_ "v") $ + content $ renderCell sharedStrings' cell + where + celAttr = attr "r" ref <> + renderCellType sharedStrings' cell + <> foldMap (attr "s" . txti) (cell ^. cellStyle) + ref :: Text + ref = coerce $ singleCellRef (rix, cix) + +renderCellType :: Map Text Int -> Cell -> Attributes +renderCellType sharedStrings' cell = + maybe mempty + (attr "t" . renderType sharedStrings') + $ cell ^? cellValue . _Just + +renderCell :: Map Text Int -> Cell -> Text +renderCell sharedStrings' cell = renderValue sharedStrings' val + where + val :: CellValue + val = fromMaybe (CellText mempty) $ cell ^? cellValue . _Just + +renderValue :: Map Text Int -> CellValue -> Text +renderValue sharedStrings' = \case + CellText x -> + -- if we can't find it in the sst, print the string + maybe x toAttrVal $ sharedStrings' ^? ix x + CellDouble x -> toAttrVal x + CellBool b -> toAttrVal b + CellRich _ -> error "rich text is not supported yet" + CellError err -> toAttrVal err + + +renderType :: Map Text Int -> CellValue -> Text +renderType sharedStrings' = \case + CellText x -> + maybe "str" (const "s") $ sharedStrings' ^? ix x + CellDouble _ -> "n" + CellBool _ -> "b" + CellRich _ -> "r" + CellError _ -> "e" diff --git a/stack-lts-12.yaml b/stack-lts-12.yaml index 0843c9d2..4b3d6ae5 100644 --- a/stack-lts-12.yaml +++ b/stack-lts-12.yaml @@ -1,3 +1,6 @@ resolver: lts-12.26 packages: - '.' + +extra-deps: +- zip-stream-0.2.1.0@sha256:9601c2a5addd3edd8ab1f7ac8c3753e92326e1971c6d15d123e621ff8c92e002,1749 diff --git a/stack-lts-14.yaml b/stack-lts-14.yaml index 3732cb9c..9eef10eb 100644 --- a/stack-lts-14.yaml +++ b/stack-lts-14.yaml @@ -1,3 +1,6 @@ resolver: lts-14.27 packages: - '.' + +extra-deps: +- indexed-traversable-0.1.1@sha256:e4602357513aa3b146546227732e6e5698392f4324ab690e109cc05676ba3b4f,2491 diff --git a/test/Main.hs b/test/Main.hs index 2f5dc77a..c57b5360 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,18 +1,9 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE OverloadedLists #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE QuasiQuotes #-} module Main ( main ) where -#ifdef USE_MICROLENS -import Lens.Micro -import Lens.Micro.Mtl -#else -import Control.Lens -#endif import Control.Monad.State.Lazy import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as LB @@ -20,6 +11,7 @@ import Data.Map (Map) import qualified Data.Map as M import Data.Time.Clock.POSIX (POSIXTime) import qualified Data.Vector as V +import qualified StreamTests import Text.RawString.QQ import Text.XML @@ -27,13 +19,13 @@ import Test.Tasty (defaultMain, testGroup) import Test.Tasty.HUnit (testCase) import Test.Tasty.HUnit ((@=?)) +import TestXlsx import Codec.Xlsx import Codec.Xlsx.Formatted import Codec.Xlsx.Types.Internal import Codec.Xlsx.Types.Internal.CommentTable -import Codec.Xlsx.Types.Internal.CustomProperties - as CustomProperties +import Codec.Xlsx.Types.Internal.CustomProperties as CustomProperties import Codec.Xlsx.Types.Internal.SharedStringTable import AutoFilterTests @@ -41,13 +33,14 @@ import Common import CommonTests import CondFmtTests import Diff -import PivotTableTests import DrawingTests +import PivotTableTests main :: IO () main = defaultMain $ testGroup "Tests" - [ testCase "write . read == id" $ do + [ + testCase "write . read == id" $ do let bs = fromXlsx testTime testXlsx LB.writeFile "data-test.xlsx" bs testXlsx @==? toXlsx (fromXlsx testTime testXlsx) @@ -84,535 +77,11 @@ main = defaultMain $ , testCase "toXlsxEither: properly formatted" $ Right testXlsx @==? toXlsxEither (fromXlsx testTime testXlsx) , testCase "toXlsxEither: invalid format" $ - Left InvalidZipArchive @==? toXlsxEither "this is not a valid XLSX file" + Left (InvalidZipArchive "Did not find end of central directory signature") @==? toXlsxEither "this is not a valid XLSX file" , CommonTests.tests , CondFmtTests.tests , PivotTableTests.tests , DrawingTests.tests , AutoFilterTests.tests - ] - -testXlsx :: Xlsx -testXlsx = Xlsx sheets minimalStyles definedNames customProperties DateBase1904 - where - sheets = - [("List1", sheet1), ("Another sheet", sheet2), ("with pivot table", pvSheet)] - sheet1 = Worksheet cols rowProps testCellMap1 drawing ranges - sheetViews pageSetup cFormatting validations [] (Just autoFilter) - tables (Just protection) sharedFormulas - sharedFormulas = - M.fromList - [ (SharedFormulaIndex 0, SharedFormulaOptions (CellRef "A5:C5") (Formula "A4")) - , (SharedFormulaIndex 1, SharedFormulaOptions (CellRef "B6:C6") (Formula "B3+12")) - ] - autoFilter = def & afRef ?~ CellRef "A1:E10" - & afFilterColumns .~ fCols - fCols = M.fromList [ (1, Filters DontFilterByBlank - [FilterValue "a", FilterValue "b",FilterValue "ZZZ"]) - , (2, CustomFiltersAnd (CustomFilter FltrGreaterThanOrEqual "0") - (CustomFilter FltrLessThan "42"))] - tables = - [ Table - { tblName = Just "Table1" - , tblDisplayName = "Table1" - , tblRef = CellRef "A3" - , tblColumns = [TableColumn "another text"] - , tblAutoFilter = Just (def & afRef ?~ CellRef "A3") - } - ] - protection = - fullSheetProtection - { _sprScenarios = False - , _sprLegacyPassword = Just $ legacyPassword "hard password" - } - sheet2 = def & wsCells .~ testCellMap2 - pvSheet = sheetWithPvCells & wsPivotTables .~ [testPivotTable] - sheetWithPvCells = def & wsCells .~ testPivotSrcCells - rowProps = M.fromList [(1, RowProps { rowHeight = Just (CustomHeight 50) - , rowStyle = Just 3 - , rowHidden = False - })] - cols = [ColumnsProperties 1 10 (Just 15) (Just 1) False False False] - drawing = Just $ testDrawing { _xdrAnchors = map resolve $ _xdrAnchors testDrawing } - resolve :: Anchor RefId RefId -> Anchor FileInfo ChartSpace - resolve Anchor {..} = - let obj = - case _anchObject of - Picture {..} -> - let blipFill = (_picBlipFill & bfpImageInfo ?~ fileInfo) - in Picture - { _picMacro = _picMacro - , _picPublished = _picPublished - , _picNonVisual = _picNonVisual - , _picBlipFill = blipFill - , _picShapeProperties = _picShapeProperties - } - Graphic nv _ tr -> - Graphic nv testLineChartSpace tr - in Anchor - { _anchAnchoring = _anchAnchoring - , _anchObject = obj - , _anchClientData = _anchClientData - } - fileInfo = FileInfo "dummy.png" "image/png" "fake contents" - ranges = [mkRange (1,1) (1,2), mkRange (2,2) (10, 5)] - minimalStyles = renderStyleSheet minimalStyleSheet - definedNames = DefinedNames [("SampleName", Nothing, "A10:A20")] - sheetViews = Just [sheetView1, sheetView2] - sheetView1 = def & sheetViewRightToLeft ?~ True - & sheetViewTopLeftCell ?~ CellRef "B5" - sheetView2 = def & sheetViewType ?~ SheetViewTypePageBreakPreview - & sheetViewWorkbookViewId .~ 5 - & sheetViewSelection .~ [ def & selectionActiveCell ?~ CellRef "C2" - & selectionPane ?~ PaneTypeBottomRight - , def & selectionActiveCellId ?~ 1 - & selectionSqref ?~ SqRef [ CellRef "A3:A10" - , CellRef "B1:G3"] - ] - pageSetup = Just $ def & pageSetupBlackAndWhite ?~ True - & pageSetupCopies ?~ 2 - & pageSetupErrors ?~ PrintErrorsDash - & pageSetupPaperSize ?~ PaperA4 - customProperties = M.fromList [("some_prop", VtInt 42)] - cFormatting = M.fromList [(SqRef [CellRef "A1:B3"], rules1), (SqRef [CellRef "C1:C10"], rules2)] - cfRule c d = CfRule { _cfrCondition = c - , _cfrDxfId = Just d - , _cfrPriority = topCfPriority - , _cfrStopIfTrue = Nothing - } - rules1 = [ cfRule ContainsBlanks 1 - , cfRule (ContainsText "foo") 2 - , cfRule (CellIs (OpBetween (Formula "A1") (Formula "B10"))) 3 - ] - rules2 = [ cfRule ContainsErrors 3 ] - -testCellMap1 :: CellMap -testCellMap1 = M.fromList [ ((1, 2), cd1_2), ((1, 5), cd1_5), ((1, 10), cd1_10) - , ((3, 1), cd3_1), ((3, 2), cd3_2), ((3, 3), cd3_3), ((3, 7), cd3_7) - , ((4, 1), cd4_1), ((4, 2), cd4_2), ((4, 3), cd4_3) - , ((5, 1), cd5_1), ((5, 2), cd5_2), ((5, 3), cd5_3) - , ((6, 2), cd6_2), ((6, 3), cd6_3) - ] - where - cd v = def {_cellValue=Just v} - cd1_2 = cd (CellText "just a text, fließen, русский <> и & \"in quotes\"") - cd1_5 = cd (CellDouble 42.4567) - cd1_10 = cd (CellText "") - cd3_1 = cd (CellText "another text") - cd3_2 = def -- shouldn't it be skipped? - cd3_3 = def & cellValue ?~ CellError ErrorDiv0 - & cellFormula ?~ simpleCellFormula "1/0" - cd3_7 = cd (CellBool True) - cd4_1 = cd (CellDouble 1) - cd4_2 = cd (CellDouble 123456789012345) - cd4_3 = (cd (CellDouble (1+2))) { _cellFormula = - Just $ simpleCellFormula "A4+B4<>11" - } - cd5_1 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 0) - cd5_2 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 0) - cd5_3 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 0) - cd6_2 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 1) - cd6_3 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 1) - -testCellMap2 :: CellMap -testCellMap2 = M.fromList [ ((1, 2), def & cellValue ?~ CellText "something here") - , ((3, 5), def & cellValue ?~ CellDouble 123.456) - , ((2, 4), - def & cellValue ?~ CellText "value" - & cellComment ?~ comment1 - ) - , ((10, 7), - def & cellValue ?~ CellText "value" - & cellComment ?~ comment2 - ) - , ((11, 4), def & cellComment ?~ comment3) - ] - where - comment1 = Comment (XlsxText "simple comment") "bob" True - comment2 = Comment (XlsxRichText [rich1, rich2]) "alice" False - comment3 = Comment (XlsxText "comment for an empty cell") "bob" True - rich1 = def & richTextRunText.~ "Look ma!" - & richTextRunProperties ?~ ( - def & runPropertiesBold ?~ True - & runPropertiesFont ?~ "Tahoma") - rich2 = def & richTextRunText .~ "It's blue!" - & richTextRunProperties ?~ ( - def & runPropertiesItalic ?~ True - & runPropertiesColor ?~ (def & colorARGB ?~ "FF000080")) - -testTime :: POSIXTime -testTime = 123 - -fromRight :: Show a => Either a b -> b -fromRight (Right b) = b -fromRight (Left x) = error $ "Right _ was expected but Left " ++ show x ++ " found" - -testStyleSheet :: StyleSheet -testStyleSheet = minimalStyleSheet & styleSheetDxfs .~ [dxf1, dxf2, dxf3] - & styleSheetNumFmts .~ M.fromList [(164, "0.000")] - & styleSheetCellXfs %~ (++ [cellXf1, cellXf2]) - where - dxf1 = def & dxfFont ?~ (def & fontBold ?~ True - & fontSize ?~ 12) - dxf2 = def & dxfFill ?~ (def & fillPattern ?~ (def & fillPatternBgColor ?~ red)) - dxf3 = def & dxfNumFmt ?~ NumFmt 164 "0.000" - red = def & colorARGB ?~ "FFFF0000" - cellXf1 = def - { _cellXfApplyNumberFormat = Just True - , _cellXfNumFmtId = Just 2 } - cellXf2 = def - { _cellXfApplyNumberFormat = Just True - , _cellXfNumFmtId = Just 164 } - - -withSingleUnderline :: SharedStringTable -> SharedStringTable -withSingleUnderline = withUnderline FontUnderlineSingle - -withDoubleUnderline :: SharedStringTable -> SharedStringTable -withDoubleUnderline = withUnderline FontUnderlineDouble - -withUnderline :: FontUnderline -> SharedStringTable -> SharedStringTable -withUnderline u (SharedStringTable [text, XlsxRichText [rich1, RichTextRun (Just props) val]]) = - let newprops = props & runPropertiesUnderline .~ Just u - in SharedStringTable [text, XlsxRichText [rich1, RichTextRun (Just newprops) val]] - -testSharedStringTable :: SharedStringTable -testSharedStringTable = SharedStringTable $ V.fromList items - where - items = [text, rich] - text = XlsxText "plain text" - rich = XlsxRichText [ RichTextRun Nothing "Just " - , RichTextRun (Just props) "example" ] - props = def & runPropertiesBold .~ Just True - & runPropertiesItalic .~ Just True - & runPropertiesSize .~ Just 10 - & runPropertiesFont .~ Just "Arial" - & runPropertiesFontFamily .~ Just FontFamilySwiss - -testSharedStringTableWithEmpty :: SharedStringTable -testSharedStringTableWithEmpty = - SharedStringTable $ V.fromList [XlsxText ""] - -testCommentTable :: CommentTable -testCommentTable = CommentTable $ M.fromList - [ (CellRef "D4", Comment (XlsxRichText rich) "Bob" True) - , (CellRef "A2", Comment (XlsxText "Some comment here") "CBR" True) ] - where - rich = [ RichTextRun - { _richTextRunProperties = - Just $ def & runPropertiesBold ?~ True - & runPropertiesCharset ?~ 1 - & runPropertiesColor ?~ def -- TODO: why not Nothing here? - & runPropertiesFont ?~ "Calibri" - & runPropertiesScheme ?~ FontSchemeMinor - & runPropertiesSize ?~ 8.0 - , _richTextRunText = "Bob:"} - , RichTextRun - { _richTextRunProperties = - Just $ def & runPropertiesCharset ?~ 1 - & runPropertiesColor ?~ def - & runPropertiesFont ?~ "Calibri" - & runPropertiesScheme ?~ FontSchemeMinor - & runPropertiesSize ?~ 8.0 - , _richTextRunText = "Why such high expense?"}] - -testStrings :: ByteString -testStrings = [r| - - - plain text - Just - example - -|] - -testStringsWithSingleUnderline :: ByteString -testStringsWithSingleUnderline = [r| - - - plain text - Just - example - -|] - -testStringsWithDoubleUnderline :: ByteString -testStringsWithDoubleUnderline = [r| - - - plain text - Just - example - -|] - -testStringsWithEmpty :: ByteString -testStringsWithEmpty = [r| - - - - -|] - -testComments :: ByteString -testComments = [r| - - - - Bob - CBR - - - - - - - - - - Bob: - - - - - - - Why such high expense? - - - - - Some comment here - - - -|] - -testCustomProperties :: CustomProperties -testCustomProperties = CustomProperties.fromList - [ ("testTextProp", VtLpwstr "test text property value") - , ("prop2", VtLpwstr "222") - , ("bool", VtBool False) - , ("prop333", VtInt 1) - , ("decimal", VtDecimal 1.234) ] - -testCustomPropertiesXml :: ByteString -testCustomPropertiesXml = [r| - - - - 222 - - - 1 - - - test text property value - - - 1.234 - - - false - - - - ZXhhbXBs - ZSBibG9i - IGNvbnRl - bnRz - - - -|] - -testFormattedResult :: Formatted -testFormattedResult = Formatted cm styleSheet merges - where - cm = M.fromList [ ((1, 1), cell11) - , ((1, 2), cell12) - , ((2, 5), cell25) ] - cell11 = Cell - { _cellStyle = Just 1 - , _cellValue = Just (CellText "text at A1") - , _cellComment = Nothing - , _cellFormula = Nothing } - cell12 = Cell - { _cellStyle = Just 2 - , _cellValue = Just (CellDouble 1.23) - , _cellComment = Nothing - , _cellFormula = Nothing } - cell25 = Cell - { _cellStyle = Just 3 - , _cellValue = Just (CellDouble 1.23456) - , _cellComment = Nothing - , _cellFormula = Nothing } - merges = [] - styleSheet = - minimalStyleSheet & styleSheetCellXfs %~ (++ [cellXf1, cellXf2, cellXf3]) - & styleSheetFonts %~ (++ [font1, font2]) - & styleSheetNumFmts .~ numFmts - nextFontId = length (minimalStyleSheet ^. styleSheetFonts) - cellXf1 = def - { _cellXfApplyFont = Just True - , _cellXfFontId = Just nextFontId } - font1 = def - { _fontName = Just "Calibri" - , _fontBold = Just True } - cellXf2 = def - { _cellXfApplyFont = Just True - , _cellXfFontId = Just (nextFontId + 1) - , _cellXfApplyNumberFormat = Just True - , _cellXfNumFmtId = Just 164 } - font2 = def - { _fontItalic = Just True } - cellXf3 = def - { _cellXfApplyNumberFormat = Just True - , _cellXfNumFmtId = Just 2 } - numFmts = M.fromList [(164, "0.0000")] - -testRunFormatted :: Formatted -testRunFormatted = formatted formattedCellMap minimalStyleSheet - where - formattedCellMap = flip execState def $ do - let font1 = def & fontBold ?~ True - & fontName ?~ "Calibri" - at (1, 1) ?= (def & formattedCell . cellValue ?~ CellText "text at A1" - & formattedFormat . formatFont ?~ font1) - at (1, 2) ?= (def & formattedCell . cellValue ?~ CellDouble 1.23 - & formattedFormat . formatFont . non def . fontItalic ?~ True - & formattedFormat . formatNumberFormat ?~ fmtDecimalsZeroes 4) - at (2, 5) ?= (def & formattedCell . cellValue ?~ CellDouble 1.23456 - & formattedFormat . formatNumberFormat ?~ StdNumberFormat Nf2Decimal) - -testFormatWorkbookResult :: Xlsx -testFormatWorkbookResult = def & xlSheets .~ sheets - & xlStyles .~ renderStyleSheet style - where - testCellMap1 = M.fromList [((1, 1), Cell { _cellStyle = Nothing - , _cellValue = Just (CellText "text at A1 Sheet1") - , _cellComment = Nothing - , _cellFormula = Nothing })] - testCellMap2 = M.fromList [((2, 3), Cell { _cellStyle = Just 1 - , _cellValue = Just (CellDouble 1.23456) - , _cellComment = Nothing - , _cellFormula = Nothing })] - sheets = [ ("Sheet1", def & wsCells .~ testCellMap1) - , ("Sheet2", def & wsCells .~ testCellMap2) - ] - style = minimalStyleSheet & styleSheetNumFmts .~ M.fromList [(164, "DD.MM.YYYY")] - & styleSheetCellXfs .~ [cellXf1, cellXf2] - cellXf1 = def - & cellXfBorderId .~ Just 0 - & cellXfFillId .~ Just 0 - & cellXfFontId .~ Just 0 - cellXf2 = def - { _cellXfApplyNumberFormat = Just True - , _cellXfNumFmtId = Just 164 } - -testFormatWorkbook :: Xlsx -testFormatWorkbook = formatWorkbook sheets minimalStyleSheet - where - sheetNames = ["Sheet1", "Sheet2"] - testFormattedCellMap1 = M.fromList [((1,1), (def & formattedCell . cellValue ?~ CellText "text at A1 Sheet1"))] - - testFormattedCellMap2 = M.fromList [((2,3), (def & formattedCell . cellValue ?~ CellDouble 1.23456 - & formattedFormat . formatNumberFormat ?~ (UserNumberFormat "DD.MM.YYYY")))] - sheets = zip sheetNames [testFormattedCellMap1, testFormattedCellMap2] - -testCondFormattedResult :: CondFormatted -testCondFormattedResult = CondFormatted styleSheet formattings - where - styleSheet = - minimalStyleSheet & styleSheetDxfs .~ dxfs - dxfs = [ def & dxfFont ?~ (def & fontUnderline ?~ FontUnderlineSingle) - , def & dxfFont ?~ (def & fontStrikeThrough ?~ True) - , def & dxfFont ?~ (def & fontBold ?~ True) ] - formattings = M.fromList [ (SqRef [CellRef "A1:A2", CellRef "B2:B3"], [cfRule1, cfRule2]) - , (SqRef [CellRef "C3:E10"], [cfRule1]) - , (SqRef [CellRef "F1:G10"], [cfRule3]) ] - cfRule1 = CfRule - { _cfrCondition = ContainsBlanks - , _cfrDxfId = Just 0 - , _cfrPriority = 1 - , _cfrStopIfTrue = Nothing } - cfRule2 = CfRule - { _cfrCondition = BeginsWith "foo" - , _cfrDxfId = Just 1 - , _cfrPriority = 1 - , _cfrStopIfTrue = Nothing } - cfRule3 = CfRule - { _cfrCondition = CellIs (OpGreaterThan (Formula "A1")) - , _cfrDxfId = Just 2 - , _cfrPriority = 1 - , _cfrStopIfTrue = Nothing } - -testFormattedCells :: Map (Int, Int) FormattedCell -testFormattedCells = flip execState def $ do - at (1,1) ?= (def & formattedRowSpan .~ 5 - & formattedColSpan .~ 5 - & formattedFormat . formatBorder . non def . borderTop . - non def . borderStyleLine ?~ LineStyleDashed - & formattedFormat . formatBorder . non def . borderBottom . - non def . borderStyleLine ?~ LineStyleDashed) - at (10,2) ?= (def & formattedFormat . formatFont . non def . fontBold ?~ True) - -testRunCondFormatted :: CondFormatted -testRunCondFormatted = conditionallyFormatted condFmts minimalStyleSheet - where - condFmts = flip execState def $ do - let cfRule1 = def & condfmtCondition .~ ContainsBlanks - & condfmtDxf . dxfFont . non def . fontUnderline ?~ FontUnderlineSingle - cfRule2 = def & condfmtCondition .~ BeginsWith "foo" - & condfmtDxf . dxfFont . non def . fontStrikeThrough ?~ True - cfRule3 = def & condfmtCondition .~ CellIs (OpGreaterThan (Formula "A1")) - & condfmtDxf . dxfFont . non def . fontBold ?~ True - at (CellRef "A1:A2") ?= [cfRule1, cfRule2] - at (CellRef "B2:B3") ?= [cfRule1, cfRule2] - at (CellRef "C3:E10") ?= [cfRule1] - at (CellRef "F1:G10") ?= [cfRule3] - -validations :: Map SqRef DataValidation -validations = M.fromList - [ ( SqRef [CellRef "A1"], def - ) - , ( SqRef [CellRef "A1", CellRef "B2:C3"], def - { _dvAllowBlank = True - , _dvError = Just "incorrect data" - , _dvErrorStyle = ErrorStyleInformation - , _dvErrorTitle = Just "error title" - , _dvPrompt = Just "enter data" - , _dvPromptTitle = Just "prompt title" - , _dvShowDropDown = True - , _dvShowErrorMessage = True - , _dvShowInputMessage = True - , _dvValidationType = ValidationTypeList ["aaaa","bbbb","cccc"] - } - ) - , ( SqRef [CellRef "A6", CellRef "I2"], def - { _dvAllowBlank = False - , _dvError = Just "aaa" - , _dvErrorStyle = ErrorStyleWarning - , _dvErrorTitle = Just "bbb" - , _dvPrompt = Just "ccc" - , _dvPromptTitle = Just "ddd" - , _dvShowDropDown = False - , _dvShowErrorMessage = False - , _dvShowInputMessage = False - , _dvValidationType = ValidationTypeDecimal $ ValGreaterThan $ Formula "10" - } - ) - , ( SqRef [CellRef "A7"], def - { _dvAllowBlank = False - , _dvError = Just "aaa" - , _dvErrorStyle = ErrorStyleStop - , _dvErrorTitle = Just "bbb" - , _dvPrompt = Just "ccc" - , _dvPromptTitle = Just "ddd" - , _dvShowDropDown = False - , _dvShowErrorMessage = False - , _dvShowInputMessage = False - , _dvValidationType = ValidationTypeWhole $ ValNotBetween (Formula "10") (Formula "12") - } - ) + , StreamTests.tests ] diff --git a/test/StreamTests.hs b/test/StreamTests.hs new file mode 100644 index 00000000..1ddee11a --- /dev/null +++ b/test/StreamTests.hs @@ -0,0 +1,208 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE CPP #-} + +module StreamTests + ( tests + ) where + +#ifdef USE_MICROLENS + +import Test.Tasty (TestName, TestTree, testGroup) +tests :: TestTree +tests = testGroup + "I stubbed out the tests module for microlens \ + because it doesn't understand setOf. \ + Volunteers are welcome to fix this!" + [] +#else + +import Control.Exception +import Codec.Xlsx +import Codec.Xlsx.Parser.Stream +import Codec.Xlsx.Types.Common +import Codec.Xlsx.Types.Internal.SharedStringTable +import Conduit ((.|)) +import qualified Conduit as C +import Control.Exception (bracket) +import Control.Lens hiding (indexed) +import Data.Set.Lens +import Data.ByteString.Lazy (ByteString) +import qualified Data.ByteString.Lazy as LB +import qualified Data.ByteString as BS +import Data.Map (Map) +import qualified Data.Map as M +import qualified Data.Map as Map +import qualified Data.IntMap.Strict as IM +import Data.Maybe (mapMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text as Text +import Data.Vector (Vector, indexed, toList) +import Diff +import System.Directory (getTemporaryDirectory) +import System.FilePath.Posix +import Test.Tasty (TestName, TestTree, testGroup) +import Test.Tasty.HUnit (testCase) +import TestXlsx +import Text.RawString.QQ +import Text.XML +import qualified Codec.Xlsx.Writer.Stream as SW +import qualified Codec.Xlsx.Writer.Internal.Stream as SW +import Control.Monad.State.Lazy +import Test.Tasty.SmallCheck +import qualified Data.Set as Set +import Data.Set (Set) +import Text.Printf +import Debug.Trace +import Control.DeepSeq +import Data.Conduit +import Codec.Xlsx.Formatted + +toBs = LB.toStrict . fromXlsx testTime + +tests :: TestTree +tests = + testGroup "Stream tests" + [ + testGroup "Writer/shared strings" + [ testProperty "Input same as the output" sharedStringInputSameAsOutput + , testProperty "Set of input texts is same as map length" sharedStringInputTextsIsSameAsMapLength + , testProperty "Set of input texts is as value set length" sharedStringInputTextsIsSameAsValueSetLength + ], + + testGroup "Reader/Writer" + [ testCase "Write as stream, see if memory based implementation can read it" $ readWrite simpleWorkbook + , testCase "Write as stream, see if memory based implementation can read it" $ readWrite simpleWorkbookRow + , testCase "Test a small workbook which has a fullblown sqaure" $ readWrite smallWorkbook + , testCase "Test a big workbook as a full square which caused issues with zipstream \ + The buffer of zipstream maybe 1kb, this workbook is big enough \ + to be more than that. \ + So if this encodes/decodes we know we can handle those sizes. \ + In some older version the bytestring got cut off resulting in a corrupt xlsx file" + $ readWrite bigWorkbook + -- , testCase "Write as stream, see if memory based implementation can read it" $ readWrite testXlsx + -- TODO forall SheetItem write that can be read + ], + + testGroup "Reader/inline strings" + [ testCase "Can parse row with inline strings" inlineStringsAreParsed + ] + ] + +readWrite :: Xlsx -> IO () +readWrite input = do + BS.writeFile "testinput.xlsx" (toBs input) + items <- fmap (toListOf (traversed . si_row)) $ runXlsxM "testinput.xlsx" $ collectItems $ makeIndex 1 + bs <- runConduitRes $ void (SW.writeXlsx SW.defaultSettings $ C.yieldMany items) .| C.foldC + case toXlsxEither $ LB.fromStrict bs of + Right result -> + input @==? result + Left x -> do + throwIO x + +-- test if the input text is also the result (a property we use for convenience) +sharedStringInputSameAsOutput :: Text -> Either String String +sharedStringInputSameAsOutput someText = + if someText == out then Right msg else Left msg + where + out = fst $ evalState (SW.upsertSharedString someText) SW.initialSharedString + msg = printf "'%s' = '%s'" (Text.unpack out) (Text.unpack someText) + +-- test if unique strings actually get set in the map as keys +sharedStringInputTextsIsSameAsMapLength :: [Text] -> Bool +sharedStringInputTextsIsSameAsMapLength someTexts = + length result == length unqTexts + where + result :: Map Text Int + result = view SW.string_map $ traverse SW.upsertSharedString someTexts `execState` SW.initialSharedString + unqTexts :: Set Text + unqTexts = Set.fromList someTexts + +-- test for every unique string we get a unique number +sharedStringInputTextsIsSameAsValueSetLength :: [Text] -> Bool +sharedStringInputTextsIsSameAsValueSetLength someTexts = + length result == length unqTexts + where + result :: Set Int + result = setOf (SW.string_map . traversed) $ traverse SW.upsertSharedString someTexts `execState` SW.initialSharedString + unqTexts :: Set Text + unqTexts = Set.fromList someTexts + +-- can we do xx +simpleWorkbook :: Xlsx +simpleWorkbook = set xlSheets sheets def + where + sheets = [("Sheet1" , toWs [((1,1), a1), ((1,2), cellValue ?~ CellText "text at B1 Sheet1" $ def)])] + +a1 :: Cell +a1 = cellValue ?~ CellText "text at A1 Sheet1" $ cellStyle ?~ 1 $ def + +-- can we do x +-- x +simpleWorkbookRow :: Xlsx +simpleWorkbookRow = set xlSheets sheets def + where + sheets = [("Sheet1" , toWs [((1,1), a1), ((2,1), cellValue ?~ CellText "text at A2 Sheet1" $ def)])] + + +tshow :: Show a => a -> Text +tshow = Text.pack . show + +toWs :: [((Int,Int), Cell)] -> Worksheet +toWs x = set wsCells (M.fromList x) def + +-- can we do xxx +-- xxx +-- . +-- . +smallWorkbook :: Xlsx +smallWorkbook = set xlSheets sheets def + where + sheets = [("Sheet1" , toWs $ [1..2] >>= \row -> + [((row,1), a1) + , ((row,2), def & cellValue ?~ CellText ("text at B"<> tshow row <> " Sheet1")) + , ((row,3), def & cellValue ?~ CellText "text at C1 Sheet1") + , ((row,4), def & cellValue ?~ CellDouble (0.2 + 0.1)) + , ((row,5), def & cellValue ?~ CellBool False) + ] + )] +bigWorkbook :: Xlsx +bigWorkbook = set xlSheets sheets def + where + sheets = [("Sheet1" , toWs $ [1..512] >>= \row -> + [((row,1), a1) + ,((row,2), def & cellValue ?~ CellText ("text at B"<> tshow row <> " Sheet1")) + ,((row,3), def & cellValue ?~ CellText "text at C1 Sheet1") + ] + )] + +inlineStringsAreParsed :: IO () +inlineStringsAreParsed = do + items <- runXlsxM "data/inline-strings.xlsx" $ collectItems $ makeIndex 1 + let expected = + [ IM.fromList + [ ( 1, + Cell + { _cellStyle = Nothing, + _cellValue = Just (CellText "My Inline String"), + _cellComment = Nothing, + _cellFormula = Nothing + } + ), + ( 2, + Cell + { _cellStyle = Nothing, + _cellValue = Just (CellText "two"), + _cellComment = Nothing, + _cellFormula = Nothing + } + ) + ] + ] + expected @==? (items ^.. traversed . si_row . ri_cell_row) + +#endif diff --git a/test/TestXlsx.hs b/test/TestXlsx.hs new file mode 100644 index 00000000..fb210cf0 --- /dev/null +++ b/test/TestXlsx.hs @@ -0,0 +1,568 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE CPP #-} + +module TestXlsx where + +#ifdef USE_MICROLENS +import Lens.Micro.Platform +#else +import Control.Lens +#endif +import Control.Monad.State.Lazy +import Data.ByteString.Lazy (ByteString) +import qualified Data.ByteString.Lazy as LB +import Data.Map (Map) +import qualified Data.Map as M +import Data.Time.Clock.POSIX (POSIXTime) +import qualified Data.Vector as V +import Text.RawString.QQ +import Text.XML + +import Test.Tasty (defaultMain, testGroup) +import Test.Tasty.HUnit (testCase) + +import Test.Tasty.HUnit ((@=?)) + +import Codec.Xlsx +import Codec.Xlsx.Formatted +import Codec.Xlsx.Types.Internal +import Codec.Xlsx.Types.Internal.CommentTable +import Codec.Xlsx.Types.Internal.CustomProperties + as CustomProperties +import Codec.Xlsx.Types.Internal.SharedStringTable + +import AutoFilterTests +import Common +import CommonTests +import CondFmtTests +import Diff +import PivotTableTests +import DrawingTests + +testXlsx :: Xlsx +testXlsx = Xlsx sheets minimalStyles definedNames customProperties DateBase1904 + where + sheets = + [("List1", sheet1), ("Another sheet", sheet2), ("with pivot table", pvSheet)] + sheet1 = Worksheet cols rowProps testCellMap1 drawing ranges + sheetViews pageSetup cFormatting validations [] (Just autoFilter) + tables (Just protection) sharedFormulas + sharedFormulas = + M.fromList + [ (SharedFormulaIndex 0, SharedFormulaOptions (CellRef "A5:C5") (Formula "A4")) + , (SharedFormulaIndex 1, SharedFormulaOptions (CellRef "B6:C6") (Formula "B3+12")) + ] + autoFilter = def & afRef ?~ CellRef "A1:E10" + & afFilterColumns .~ fCols + fCols = M.fromList [ (1, Filters DontFilterByBlank + [FilterValue "a", FilterValue "b",FilterValue "ZZZ"]) + , (2, CustomFiltersAnd (CustomFilter FltrGreaterThanOrEqual "0") + (CustomFilter FltrLessThan "42"))] + tables = + [ Table + { tblName = Just "Table1" + , tblDisplayName = "Table1" + , tblRef = CellRef "A3" + , tblColumns = [TableColumn "another text"] + , tblAutoFilter = Just (def & afRef ?~ CellRef "A3") + } + ] + protection = + fullSheetProtection + { _sprScenarios = False + , _sprLegacyPassword = Just $ legacyPassword "hard password" + } + sheet2 = def & wsCells .~ testCellMap2 + pvSheet = sheetWithPvCells & wsPivotTables .~ [testPivotTable] + sheetWithPvCells = def & wsCells .~ testPivotSrcCells + rowProps = M.fromList [(1, RowProps { rowHeight = Just (CustomHeight 50) + , rowStyle = Just 3 + , rowHidden = False + })] + cols = [ColumnsProperties 1 10 (Just 15) (Just 1) False False False] + drawing = Just $ testDrawing { _xdrAnchors = map resolve $ _xdrAnchors testDrawing } + resolve :: Anchor RefId RefId -> Anchor FileInfo ChartSpace + resolve Anchor {..} = + let obj = + case _anchObject of + Picture {..} -> + let blipFill = (_picBlipFill & bfpImageInfo ?~ fileInfo) + in Picture + { _picMacro = _picMacro + , _picPublished = _picPublished + , _picNonVisual = _picNonVisual + , _picBlipFill = blipFill + , _picShapeProperties = _picShapeProperties + } + Graphic nv _ tr -> + Graphic nv testLineChartSpace tr + in Anchor + { _anchAnchoring = _anchAnchoring + , _anchObject = obj + , _anchClientData = _anchClientData + } + fileInfo = FileInfo "dummy.png" "image/png" "fake contents" + ranges = [mkRange (1,1) (1,2), mkRange (2,2) (10, 5)] + minimalStyles = renderStyleSheet minimalStyleSheet + definedNames = DefinedNames [("SampleName", Nothing, "A10:A20")] + sheetViews = Just [sheetView1, sheetView2] + sheetView1 = def & sheetViewRightToLeft ?~ True + & sheetViewTopLeftCell ?~ CellRef "B5" + sheetView2 = def & sheetViewType ?~ SheetViewTypePageBreakPreview + & sheetViewWorkbookViewId .~ 5 + & sheetViewSelection .~ [ def & selectionActiveCell ?~ CellRef "C2" + & selectionPane ?~ PaneTypeBottomRight + , def & selectionActiveCellId ?~ 1 + & selectionSqref ?~ SqRef [ CellRef "A3:A10" + , CellRef "B1:G3"] + ] + pageSetup = Just $ def & pageSetupBlackAndWhite ?~ True + & pageSetupCopies ?~ 2 + & pageSetupErrors ?~ PrintErrorsDash + & pageSetupPaperSize ?~ PaperA4 + customProperties = M.fromList [("some_prop", VtInt 42)] + cFormatting = M.fromList [(SqRef [CellRef "A1:B3"], rules1), (SqRef [CellRef "C1:C10"], rules2)] + cfRule c d = CfRule { _cfrCondition = c + , _cfrDxfId = Just d + , _cfrPriority = topCfPriority + , _cfrStopIfTrue = Nothing + } + rules1 = [ cfRule ContainsBlanks 1 + , cfRule (ContainsText "foo") 2 + , cfRule (CellIs (OpBetween (Formula "A1") (Formula "B10"))) 3 + ] + rules2 = [ cfRule ContainsErrors 3 ] + +testCellMap1 :: CellMap +testCellMap1 = M.fromList [ ((1, 2), cd1_2), ((1, 5), cd1_5), ((1, 10), cd1_10) + , ((3, 1), cd3_1), ((3, 2), cd3_2), ((3, 3), cd3_3), ((3, 7), cd3_7) + , ((4, 1), cd4_1), ((4, 2), cd4_2), ((4, 3), cd4_3) + , ((5, 1), cd5_1), ((5, 2), cd5_2), ((5, 3), cd5_3) + , ((6, 2), cd6_2), ((6, 3), cd6_3) + ] + where + cd v = def {_cellValue=Just v} + cd1_2 = cd (CellText "just a text, fließen, русский <> и & \"in quotes\"") + cd1_5 = cd (CellDouble 42.4567) + cd1_10 = cd (CellText "") + cd3_1 = cd (CellText "another text") + cd3_2 = def -- shouldn't it be skipped? + cd3_3 = def & cellValue ?~ CellError ErrorDiv0 + & cellFormula ?~ simpleCellFormula "1/0" + cd3_7 = cd (CellBool True) + cd4_1 = cd (CellDouble 1) + cd4_2 = cd (CellDouble 123456789012345) + cd4_3 = (cd (CellDouble (1+2))) { _cellFormula = + Just $ simpleCellFormula "A4+B4<>11" + } + cd5_1 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 0) + cd5_2 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 0) + cd5_3 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 0) + cd6_2 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 1) + cd6_3 = def & cellFormula ?~ sharedFormulaByIndex (SharedFormulaIndex 1) + +testCellMap2 :: CellMap +testCellMap2 = M.fromList [ ((1, 2), def & cellValue ?~ CellText "something here") + , ((3, 5), def & cellValue ?~ CellDouble 123.456) + , ((2, 4), + def & cellValue ?~ CellText "value" + & cellComment ?~ comment1 + ) + , ((10, 7), + def & cellValue ?~ CellText "value" + & cellComment ?~ comment2 + ) + , ((11, 4), def & cellComment ?~ comment3) + ] + where + comment1 = Comment (XlsxText "simple comment") "bob" True + comment2 = Comment (XlsxRichText [rich1, rich2]) "alice" False + comment3 = Comment (XlsxText "comment for an empty cell") "bob" True + rich1 = def & richTextRunText.~ "Look ma!" + & richTextRunProperties ?~ ( + def & runPropertiesBold ?~ True + & runPropertiesFont ?~ "Tahoma") + rich2 = def & richTextRunText .~ "It's blue!" + & richTextRunProperties ?~ ( + def & runPropertiesItalic ?~ True + & runPropertiesColor ?~ (def & colorARGB ?~ "FF000080")) + +testTime :: POSIXTime +testTime = 123 + +fromRight :: Show a => Either a b -> b +fromRight (Right b) = b +fromRight (Left x) = error $ "Right _ was expected but Left " ++ show x ++ " found" + +testStyleSheet :: StyleSheet +testStyleSheet = minimalStyleSheet & styleSheetDxfs .~ [dxf1, dxf2, dxf3] + & styleSheetNumFmts .~ M.fromList [(164, "0.000")] + & styleSheetCellXfs %~ (++ [cellXf1, cellXf2]) + where + dxf1 = def & dxfFont ?~ (def & fontBold ?~ True + & fontSize ?~ 12) + dxf2 = def & dxfFill ?~ (def & fillPattern ?~ (def & fillPatternBgColor ?~ red)) + dxf3 = def & dxfNumFmt ?~ NumFmt 164 "0.000" + red = def & colorARGB ?~ "FFFF0000" + cellXf1 = def + { _cellXfApplyNumberFormat = Just True + , _cellXfNumFmtId = Just 2 } + cellXf2 = def + { _cellXfApplyNumberFormat = Just True + , _cellXfNumFmtId = Just 164 } + + +withSingleUnderline :: SharedStringTable -> SharedStringTable +withSingleUnderline = withUnderline FontUnderlineSingle + +withDoubleUnderline :: SharedStringTable -> SharedStringTable +withDoubleUnderline = withUnderline FontUnderlineDouble + +withUnderline :: FontUnderline -> SharedStringTable -> SharedStringTable +withUnderline u (SharedStringTable [text, XlsxRichText [rich1, RichTextRun (Just props) val]]) = + let newprops = props & runPropertiesUnderline .~ Just u + in SharedStringTable [text, XlsxRichText [rich1, RichTextRun (Just newprops) val]] + +testSharedStringTable :: SharedStringTable +testSharedStringTable = SharedStringTable $ V.fromList items + where + items = [text, rich] + text = XlsxText "plain text" + rich = XlsxRichText [ RichTextRun Nothing "Just " + , RichTextRun (Just props) "example" ] + props = def & runPropertiesBold .~ Just True + & runPropertiesItalic .~ Just True + & runPropertiesSize .~ Just 10 + & runPropertiesFont .~ Just "Arial" + & runPropertiesFontFamily .~ Just FontFamilySwiss + +testSharedStringTableWithEmpty :: SharedStringTable +testSharedStringTableWithEmpty = + SharedStringTable $ V.fromList [XlsxText ""] + +testCommentTable :: CommentTable +testCommentTable = CommentTable $ M.fromList + [ (CellRef "D4", Comment (XlsxRichText rich) "Bob" True) + , (CellRef "A2", Comment (XlsxText "Some comment here") "CBR" True) ] + where + rich = [ RichTextRun + { _richTextRunProperties = + Just $ def & runPropertiesBold ?~ True + & runPropertiesCharset ?~ 1 + & runPropertiesColor ?~ def -- TODO: why not Nothing here? + & runPropertiesFont ?~ "Calibri" + & runPropertiesScheme ?~ FontSchemeMinor + & runPropertiesSize ?~ 8.0 + , _richTextRunText = "Bob:"} + , RichTextRun + { _richTextRunProperties = + Just $ def & runPropertiesCharset ?~ 1 + & runPropertiesColor ?~ def + & runPropertiesFont ?~ "Calibri" + & runPropertiesScheme ?~ FontSchemeMinor + & runPropertiesSize ?~ 8.0 + , _richTextRunText = "Why such high expense?"}] + +testStrings :: ByteString +testStrings = [r| + + + plain text + Just + example + +|] + +testStringsWithSingleUnderline :: ByteString +testStringsWithSingleUnderline = [r| + + + plain text + Just + example + +|] + +testStringsWithDoubleUnderline :: ByteString +testStringsWithDoubleUnderline = [r| + + + plain text + Just + example + +|] + +testStringsWithEmpty :: ByteString +testStringsWithEmpty = [r| + + + + +|] + +testComments :: ByteString +testComments = [r| + + + + Bob + CBR + + + + + + + + + + Bob: + + + + + + + Why such high expense? + + + + + Some comment here + + + +|] + +testCustomProperties :: CustomProperties +testCustomProperties = CustomProperties.fromList + [ ("testTextProp", VtLpwstr "test text property value") + , ("prop2", VtLpwstr "222") + , ("bool", VtBool False) + , ("prop333", VtInt 1) + , ("decimal", VtDecimal 1.234) ] + +testCustomPropertiesXml :: ByteString +testCustomPropertiesXml = [r| + + + + 222 + + + 1 + + + test text property value + + + 1.234 + + + false + + + + ZXhhbXBs + ZSBibG9i + IGNvbnRl + bnRz + + + +|] + +testFormattedResult :: Formatted +testFormattedResult = Formatted cm styleSheet merges + where + cm = M.fromList [ ((1, 1), cell11) + , ((1, 2), cell12) + , ((2, 5), cell25) ] + cell11 = Cell + { _cellStyle = Just 1 + , _cellValue = Just (CellText "text at A1") + , _cellComment = Nothing + , _cellFormula = Nothing } + cell12 = Cell + { _cellStyle = Just 2 + , _cellValue = Just (CellDouble 1.23) + , _cellComment = Nothing + , _cellFormula = Nothing } + cell25 = Cell + { _cellStyle = Just 3 + , _cellValue = Just (CellDouble 1.23456) + , _cellComment = Nothing + , _cellFormula = Nothing } + merges = [] + styleSheet = + minimalStyleSheet & styleSheetCellXfs %~ (++ [cellXf1, cellXf2, cellXf3]) + & styleSheetFonts %~ (++ [font1, font2]) + & styleSheetNumFmts .~ numFmts + nextFontId = length (minimalStyleSheet ^. styleSheetFonts) + cellXf1 = def + { _cellXfApplyFont = Just True + , _cellXfFontId = Just nextFontId } + font1 = def + { _fontName = Just "Calibri" + , _fontBold = Just True } + cellXf2 = def + { _cellXfApplyFont = Just True + , _cellXfFontId = Just (nextFontId + 1) + , _cellXfApplyNumberFormat = Just True + , _cellXfNumFmtId = Just 164 } + font2 = def + { _fontItalic = Just True } + cellXf3 = def + { _cellXfApplyNumberFormat = Just True + , _cellXfNumFmtId = Just 2 } + numFmts = M.fromList [(164, "0.0000")] + +testRunFormatted :: Formatted +testRunFormatted = formatted formattedCellMap minimalStyleSheet + where + formattedCellMap = flip execState def $ do + let font1 = def & fontBold ?~ True + & fontName ?~ "Calibri" + at (1, 1) ?= (def & formattedCell . cellValue ?~ CellText "text at A1" + & formattedFormat . formatFont ?~ font1) + at (1, 2) ?= (def & formattedCell . cellValue ?~ CellDouble 1.23 + & formattedFormat . formatFont . non def . fontItalic ?~ True + & formattedFormat . formatNumberFormat ?~ fmtDecimalsZeroes 4) + at (2, 5) ?= (def & formattedCell . cellValue ?~ CellDouble 1.23456 + & formattedFormat . formatNumberFormat ?~ StdNumberFormat Nf2Decimal) + +testFormatWorkbookResult :: Xlsx +testFormatWorkbookResult = def & xlSheets .~ sheets + & xlStyles .~ renderStyleSheet style + where + testCellMap1 = M.fromList [((1, 1), Cell { _cellStyle = Nothing + , _cellValue = Just (CellText "text at A1 Sheet1") + , _cellComment = Nothing + , _cellFormula = Nothing })] + testCellMap2 = M.fromList [((2, 3), Cell { _cellStyle = Just 1 + , _cellValue = Just (CellDouble 1.23456) + , _cellComment = Nothing + , _cellFormula = Nothing })] + sheets = [ ("Sheet1", def & wsCells .~ testCellMap1) + , ("Sheet2", def & wsCells .~ testCellMap2) + ] + style = minimalStyleSheet & styleSheetNumFmts .~ M.fromList [(164, "DD.MM.YYYY")] + & styleSheetCellXfs .~ [cellXf1, cellXf2] + cellXf1 = def + & cellXfBorderId .~ Just 0 + & cellXfFillId .~ Just 0 + & cellXfFontId .~ Just 0 + cellXf2 = def + { _cellXfApplyNumberFormat = Just True + , _cellXfNumFmtId = Just 164 } + +testFormatWorkbook :: Xlsx +testFormatWorkbook = formatWorkbook sheets minimalStyleSheet + where + sheetNames = ["Sheet1", "Sheet2"] + testFormattedCellMap1 = M.fromList [((1,1), (def & formattedCell . cellValue ?~ CellText "text at A1 Sheet1"))] + + testFormattedCellMap2 = M.fromList [((2,3), (def & formattedCell . cellValue ?~ CellDouble 1.23456 + & formattedFormat . formatNumberFormat ?~ (UserNumberFormat "DD.MM.YYYY")))] + sheets = zip sheetNames [testFormattedCellMap1, testFormattedCellMap2] + +testCondFormattedResult :: CondFormatted +testCondFormattedResult = CondFormatted styleSheet formattings + where + styleSheet = + minimalStyleSheet & styleSheetDxfs .~ dxfs + dxfs = [ def & dxfFont ?~ (def & fontUnderline ?~ FontUnderlineSingle) + , def & dxfFont ?~ (def & fontStrikeThrough ?~ True) + , def & dxfFont ?~ (def & fontBold ?~ True) ] + formattings = M.fromList [ (SqRef [CellRef "A1:A2", CellRef "B2:B3"], [cfRule1, cfRule2]) + , (SqRef [CellRef "C3:E10"], [cfRule1]) + , (SqRef [CellRef "F1:G10"], [cfRule3]) ] + cfRule1 = CfRule + { _cfrCondition = ContainsBlanks + , _cfrDxfId = Just 0 + , _cfrPriority = 1 + , _cfrStopIfTrue = Nothing } + cfRule2 = CfRule + { _cfrCondition = BeginsWith "foo" + , _cfrDxfId = Just 1 + , _cfrPriority = 1 + , _cfrStopIfTrue = Nothing } + cfRule3 = CfRule + { _cfrCondition = CellIs (OpGreaterThan (Formula "A1")) + , _cfrDxfId = Just 2 + , _cfrPriority = 1 + , _cfrStopIfTrue = Nothing } + +testFormattedCells :: Map (Int, Int) FormattedCell +testFormattedCells = flip execState def $ do + at (1,1) ?= (def & formattedRowSpan .~ 5 + & formattedColSpan .~ 5 + & formattedFormat . formatBorder . non def . borderTop . + non def . borderStyleLine ?~ LineStyleDashed + & formattedFormat . formatBorder . non def . borderBottom . + non def . borderStyleLine ?~ LineStyleDashed) + at (10,2) ?= (def & formattedFormat . formatFont . non def . fontBold ?~ True) + +testRunCondFormatted :: CondFormatted +testRunCondFormatted = conditionallyFormatted condFmts minimalStyleSheet + where + condFmts = flip execState def $ do + let cfRule1 = def & condfmtCondition .~ ContainsBlanks + & condfmtDxf . dxfFont . non def . fontUnderline ?~ FontUnderlineSingle + cfRule2 = def & condfmtCondition .~ BeginsWith "foo" + & condfmtDxf . dxfFont . non def . fontStrikeThrough ?~ True + cfRule3 = def & condfmtCondition .~ CellIs (OpGreaterThan (Formula "A1")) + & condfmtDxf . dxfFont . non def . fontBold ?~ True + at (CellRef "A1:A2") ?= [cfRule1, cfRule2] + at (CellRef "B2:B3") ?= [cfRule1, cfRule2] + at (CellRef "C3:E10") ?= [cfRule1] + at (CellRef "F1:G10") ?= [cfRule3] + +validations :: Map SqRef DataValidation +validations = M.fromList + [ ( SqRef [CellRef "A1"], def + ) + , ( SqRef [CellRef "A1", CellRef "B2:C3"], def + { _dvAllowBlank = True + , _dvError = Just "incorrect data" + , _dvErrorStyle = ErrorStyleInformation + , _dvErrorTitle = Just "error title" + , _dvPrompt = Just "enter data" + , _dvPromptTitle = Just "prompt title" + , _dvShowDropDown = True + , _dvShowErrorMessage = True + , _dvShowInputMessage = True + , _dvValidationType = ValidationTypeList ["aaaa","bbbb","cccc"] + } + ) + , ( SqRef [CellRef "A6", CellRef "I2"], def + { _dvAllowBlank = False + , _dvError = Just "aaa" + , _dvErrorStyle = ErrorStyleWarning + , _dvErrorTitle = Just "bbb" + , _dvPrompt = Just "ccc" + , _dvPromptTitle = Just "ddd" + , _dvShowDropDown = False + , _dvShowErrorMessage = False + , _dvShowInputMessage = False + , _dvValidationType = ValidationTypeDecimal $ ValGreaterThan $ Formula "10" + } + ) + , ( SqRef [CellRef "A7"], def + { _dvAllowBlank = False + , _dvError = Just "aaa" + , _dvErrorStyle = ErrorStyleStop + , _dvErrorTitle = Just "bbb" + , _dvPrompt = Just "ccc" + , _dvPromptTitle = Just "ddd" + , _dvShowDropDown = False + , _dvShowErrorMessage = False + , _dvShowInputMessage = False + , _dvValidationType = ValidationTypeWhole $ ValNotBetween (Formula "10") (Formula "12") + } + ) + ] diff --git a/xlsx.cabal b/xlsx.cabal index a7a1106b..c7e66c1c 100644 --- a/xlsx.cabal +++ b/xlsx.cabal @@ -76,8 +76,18 @@ Library , Codec.Xlsx.Writer , Codec.Xlsx.Writer.Internal , Codec.Xlsx.Writer.Internal.PivotTable + , Codec.Xlsx.Parser.Stream + , Codec.Xlsx.Writer.Stream + , Codec.Xlsx.Writer.Internal.Stream + + -- The only function it exports is also hidden by the upstream library: https://github.com/the-real-blackh/hexpat/blob/master/Text/XML/Expat/SAX.hs#L227 + -- We could expose it but then this function is in the xlsx API for a long time. + -- It be better to expose it in the upstream library instead I think. It was copied here so the parser can use it. + Other-modules: Codec.Xlsx.Parser.Stream.HexpatInternal + , Codec.Xlsx.Parser.Internal.Memoize Build-depends: base >= 4.9.0.0 && < 5.0 + , attoparsec , base64-bytestring , binary-search , bytestring >= 0.10.8.0 @@ -85,9 +95,11 @@ Library , containers >= 0.5.0.0 , data-default , deepseq >= 1.4 + , dlist , errors , extra , filepath + , hexpat , mtl >= 2.1 , network-uri , old-locale >= 1.0.0.5 @@ -100,11 +112,20 @@ Library , xml-conduit >= 1.1.0 , zip-archive >= 0.2 , zlib >= 0.5.4.0 + , zip + , zip-stream >= 0.2.0.1 + , xml-types + , exceptions + , transformers-base + , monad-control if flag(microlens) Build-depends: microlens >= 0.4 && < 0.5 , microlens-mtl , microlens-ghc , microlens-th + , profunctors + , microlens-platform + , indexed-traversable cpp-options: -DUSE_MICROLENS else Build-depends: lens >= 3.8 && < 5.1 @@ -130,11 +151,14 @@ test-suite data-test , Diff , DrawingTests , PivotTableTests + , StreamTests , Test.SmallCheck.Series.Instances + , TestXlsx Build-Depends: base , bytestring , containers , Diff >= 0.3.0 + , directory , groom , mtl , raw-strings-qq @@ -147,9 +171,14 @@ test-suite data-test , vector , xlsx , xml-conduit >= 1.1.0 + , conduit + , filepath + , deepseq if flag(microlens) Build-depends: microlens >= 0.4 && < 0.5 , microlens-mtl + , microlens-platform + , microlens-th cpp-options: -DUSE_MICROLENS else Build-depends: lens >= 3.8 && < 5.1 @@ -167,4 +196,7 @@ benchmark bench , bytestring , criterion , xlsx + , deepseq + , conduit + , lens default-language: Haskell2010