From 9807c81a2d99358ef4caadf947ca3fe6a1736df3 Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Thu, 2 Jan 2025 14:05:54 +0100 Subject: [PATCH 01/10] started to reorganize the code to find empty colums in rectify --- src/Poseidon/CLI/Rectify.hs | 15 +++++++++- src/Poseidon/CLI/Survey.hs | 59 +++++++------------------------------ src/Poseidon/Janno.hs | 48 +++++++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 50 deletions(-) diff --git a/src/Poseidon/CLI/Rectify.hs b/src/Poseidon/CLI/Rectify.hs index f1b9ec40..6ab6e757 100644 --- a/src/Poseidon/CLI/Rectify.hs +++ b/src/Poseidon/CLI/Rectify.hs @@ -19,6 +19,7 @@ import Poseidon.Utils (PoseidonIO, getChecksum, logDebug, logInfo) import Poseidon.Version (VersionComponent (..), updateThreeComponentVersion) +--import Poseidon.Janno (JannoRows) import Control.DeepSeq ((<$!!>)) import Control.Monad.IO.Class (MonadIO, liftIO) @@ -28,6 +29,7 @@ import Data.Time (UTCTime (..), getCurrentTime) import Data.Version (Version (..), makeVersion, showVersion) import System.Directory (doesFileExist, removeFile) import System.FilePath (()) +--import Data.Function ((&)) data RectifyOptions = RectifyOptions { _rectifyBaseDirs :: [FilePath] @@ -36,6 +38,7 @@ data RectifyOptions = RectifyOptions , _rectifyPackageVersionUpdate :: Maybe PackageVersionUpdate , _rectifyChecksums :: ChecksumsToRectify , _rectifyNewContributors :: Maybe [ContributorSpec] + --, _recitfyJannoRemoveEmptyCols :: Bool , _rectifyOnlyLatest :: Bool } @@ -55,7 +58,12 @@ data ChecksumsToRectify = } runRectify :: RectifyOptions -> PoseidonIO () -runRectify (RectifyOptions baseDirs ignorePosVer newPosVer pacVerUpdate checksumUpdate newContributors onlyLatest) = do +runRectify (RectifyOptions + baseDirs + ignorePosVer newPosVer pacVerUpdate checksumUpdate newContributors + --jannoRemoveEmptyCols + onlyLatest + ) = do let pacReadOpts = defaultPackageReadOptions { _readOptIgnoreChecksums = True , _readOptIgnoreGeno = True @@ -74,9 +82,14 @@ runRectify (RectifyOptions baseDirs ignorePosVer newPosVer pacVerUpdate checksum logInfo $ "Rectifying package: " ++ renderNameWithVersion inPac updatedPacPosVer <- updatePoseidonVersion newPosVer inPac updatedPacContri <- addContributors newContributors updatedPacPosVer + --updatedJanno <- updateJanno jannoRemoveEmptyCols (posPacJanno inPac) updatedPacChecksums <- updateChecksums checksumUpdate updatedPacContri completeAndWritePackage pacVerUpdate updatedPacChecksums +--updateJanno :: Bool -> JannoRows -> PoseidonIO JannoRows +--updateJanno removeEmptyCols janno = +-- janno & undefined + updatePoseidonVersion :: Maybe Version -> PoseidonPackage -> PoseidonIO PoseidonPackage updatePoseidonVersion Nothing pac = return pac updatePoseidonVersion (Just ver) pac = do diff --git a/src/Poseidon/CLI/Survey.hs b/src/Poseidon/CLI/Survey.hs index fed67e58..7c00454a 100644 --- a/src/Poseidon/CLI/Survey.hs +++ b/src/Poseidon/CLI/Survey.hs @@ -1,17 +1,14 @@ {-# LANGUAGE DataKinds #-} -{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE TypeOperators #-} module Poseidon.CLI.Survey where import Poseidon.BibFile (BibTeX) import Poseidon.GenotypeData (GenotypeDataSpec (..), GenotypeFileSpec (..)) -import Poseidon.Janno (CsvNamedRecord, GeneticSex, - JannoRows (..), ListColumn (..)) +import Poseidon.Janno (JannoRows (..), getFillStateForAllCols) import Poseidon.Package (PackageReadOptions (..), PoseidonPackage (..), defaultPackageReadOptions, @@ -21,12 +18,7 @@ import Poseidon.Utils (PoseidonIO, logInfo) import Control.Monad (forM) import Control.Monad.IO.Class (liftIO) import Data.List (intercalate, unfoldr, zip5) -import Data.Ratio (Ratio, (%)) -import Generics.SOP (All, Generic (Code, from), - HCollapse (hcollapse), - HPure (hpure), I, K (K), NP, - Proxy (..), SListI, hcmap, hzipWith, - unI, unSOP, unZ) +import Data.Ratio (Ratio) import Poseidon.SequencingSource (SeqSourceRows (..)) import System.Directory (doesFileExist) import System.FilePath (()) @@ -93,43 +85,14 @@ renderPackageWithCompleteness genoTypeDataExists janno (SeqSourceRows seqSource) renderJannoCompleteness :: JannoRows -> String renderJannoCompleteness (JannoRows rows) = - let ratioString = map prop2Char $ getRatiosForEachField rows + let ratioString = map prop2Char $ getFillStateForAllCols rows in init ratioString -- remove last entry covering the additional columns (CsvNamedRecord) - where - -- the following magic was heavily inspired by https://stackoverflow.com/a/41524511/3216883 - getRatiosForEachField :: (Generics.SOP.Generic a, Code a ~ '[ xs ], All PresenceCountable xs) => [a] -> [Ratio Int] - getRatiosForEachField = - hcollapse - . hcmap (Proxy :: Proxy PresenceCountable) (K . measureFillState) - . hunzip - . map (unZ . unSOP . from) - hunzip :: SListI xs => [NP I xs] -> NP [] xs - hunzip = foldr (hzipWith ((:) . unI)) (hpure []) - measureFillState :: PresenceCountable a => [a] -> Ratio Int - measureFillState vals = - let nrValues = length vals - nrFilledValues = sum $ map countPresence vals - in nrFilledValues % nrValues - prop2Char :: Ratio Int -> Char - prop2Char r - | r == 0 = '.' - | r < 0.25 = '░' - | r < 0.5 = '▒' - | r < 1 = '▓' - | r == 1 = '█' - | otherwise = '?' --- A typeclass to determine if a field in a .janno row is filled -class PresenceCountable a where - countPresence :: a -> Int -instance PresenceCountable (Maybe a) where - countPresence Nothing = 0 - countPresence (Just _) = 1 -instance PresenceCountable String where - countPresence _ = 1 -instance PresenceCountable GeneticSex where - countPresence _ = 1 -instance PresenceCountable (ListColumn a) where - countPresence _ = 1 -instance PresenceCountable CsvNamedRecord where - countPresence _ = 0 +prop2Char :: Ratio Int -> Char +prop2Char r + | r == 0 = '.' + | r < 0.25 = '░' + | r < 0.5 = '▒' + | r < 1 = '▓' + | r == 1 = '█' + | otherwise = '?' diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index a4772ba5..81d0c956 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -6,6 +6,9 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} +-- and these for the column fill state magic +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TypeOperators #-} module Poseidon.Janno ( JannoRow(..), @@ -41,7 +44,8 @@ module Poseidon.Janno ( parseCsvParseError, renderCsvParseError, getMaybeListColumn, - jannoRows2EigenstratIndEntries + jannoRows2EigenstratIndEntries, + getFillStateForAllCols ) where import Poseidon.ColumnTypes @@ -75,6 +79,13 @@ import SequenceFormats.Eigenstrat (EigenstratIndEntry (..), Sex (..)) import qualified Text.Parsec as P import qualified Text.Parsec.String as P +import Generics.SOP (All, --Generic (Code, from), + HCollapse (hcollapse), + HPure (hpure), I, K (K), NP, + Proxy (..), SListI, hcmap, hzipWith, + unI, unSOP, unZ) +import qualified Generics.SOP as GSOP +import Data.Ratio (Ratio, (%)) -- | A general datatype for janno list columns newtype ListColumn a = ListColumn {getListColumn :: [a]} @@ -392,6 +403,41 @@ createMinimalSample (EigenstratIndEntry id_ sex pop) = , jAdditionalColumns = CsvNamedRecord $ HM.fromList [] } + +-- Check fill state of each .janno column +-- heavily inspired by https://stackoverflow.com/a/41524511/3216883 + +-- | A function to measure how full the .janno columns are +getFillStateForAllCols :: (GSOP.Generic a, GSOP.Code a ~ '[ xs ], All PresenceCountable xs) => [a] -> [Ratio Int] -- ' +getFillStateForAllCols = + hcollapse + . hcmap (Proxy :: Proxy PresenceCountable) (K . measureFillState) + . hunzip + . map (unZ . unSOP . GSOP.from) + where + hunzip :: SListI xs => [NP I xs] -> NP [] xs + hunzip = foldr (hzipWith ((:) . unI)) (hpure []) + measureFillState :: PresenceCountable a => [a] -> Ratio Int + measureFillState vals = + let nrValues = length vals + nrFilledValues = sum $ map countPresence vals + in nrFilledValues % nrValues + +-- | A typeclass to determine if a field in a .janno row is filled +class PresenceCountable a where + countPresence :: a -> Int +instance PresenceCountable (Maybe a) where + countPresence Nothing = 0 + countPresence (Just _) = 1 +instance PresenceCountable String where + countPresence _ = 1 +instance PresenceCountable GeneticSex where + countPresence _ = 1 +instance PresenceCountable (ListColumn a) where + countPresence _ = 1 +instance PresenceCountable CsvNamedRecord where + countPresence _ = 0 + -- Janno file writing writeJannoFile :: FilePath -> JannoRows -> IO () From 17352c0830f4ad3ca4d45d60716adc99cec8efc6 Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Thu, 2 Jan 2025 19:37:48 +0100 Subject: [PATCH 02/10] thinking about how to more easily remove empty columns --- src/Poseidon/CLI/Rectify.hs | 10 ++----- src/Poseidon/CLI/Survey.hs | 3 -- src/Poseidon/Janno.hs | 56 ++++++++++++++++++++++++------------- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/Poseidon/CLI/Rectify.hs b/src/Poseidon/CLI/Rectify.hs index 6ab6e757..4bf7fadb 100644 --- a/src/Poseidon/CLI/Rectify.hs +++ b/src/Poseidon/CLI/Rectify.hs @@ -19,7 +19,7 @@ import Poseidon.Utils (PoseidonIO, getChecksum, logDebug, logInfo) import Poseidon.Version (VersionComponent (..), updateThreeComponentVersion) ---import Poseidon.Janno (JannoRows) +import Poseidon.Janno (JannoRows (..), getFillStateForAllCols) import Control.DeepSeq ((<$!!>)) import Control.Monad.IO.Class (MonadIO, liftIO) @@ -29,7 +29,7 @@ import Data.Time (UTCTime (..), getCurrentTime) import Data.Version (Version (..), makeVersion, showVersion) import System.Directory (doesFileExist, removeFile) import System.FilePath (()) ---import Data.Function ((&)) +import Data.Function ((&)) data RectifyOptions = RectifyOptions { _rectifyBaseDirs :: [FilePath] @@ -82,14 +82,9 @@ runRectify (RectifyOptions logInfo $ "Rectifying package: " ++ renderNameWithVersion inPac updatedPacPosVer <- updatePoseidonVersion newPosVer inPac updatedPacContri <- addContributors newContributors updatedPacPosVer - --updatedJanno <- updateJanno jannoRemoveEmptyCols (posPacJanno inPac) updatedPacChecksums <- updateChecksums checksumUpdate updatedPacContri completeAndWritePackage pacVerUpdate updatedPacChecksums ---updateJanno :: Bool -> JannoRows -> PoseidonIO JannoRows ---updateJanno removeEmptyCols janno = --- janno & undefined - updatePoseidonVersion :: Maybe Version -> PoseidonPackage -> PoseidonIO PoseidonPackage updatePoseidonVersion Nothing pac = return pac updatePoseidonVersion (Just ver) pac = do @@ -169,7 +164,6 @@ updateChecksums checksumSetting pac = do if e then Just <$!!> getChk file else return defaultChkSum - completeAndWritePackage :: Maybe PackageVersionUpdate -> PoseidonPackage -> PoseidonIO () completeAndWritePackage Nothing pac = do logDebug "Writing rectified POSEIDON.yml file" diff --git a/src/Poseidon/CLI/Survey.hs b/src/Poseidon/CLI/Survey.hs index 7c00454a..0b6a574f 100644 --- a/src/Poseidon/CLI/Survey.hs +++ b/src/Poseidon/CLI/Survey.hs @@ -1,6 +1,3 @@ -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} module Poseidon.CLI.Survey where diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index 81d0c956..673e6012 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -9,6 +9,7 @@ -- and these for the column fill state magic {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeOperators #-} +{-# LANGUAGE GADTs #-} module Poseidon.Janno ( JannoRow(..), @@ -68,7 +69,7 @@ import Data.Either (lefts, rights) import qualified Data.HashMap.Strict as HM import Data.List (elemIndex, foldl', intercalate, nub, sort, - (\\)) + (\\), transpose) import Data.Maybe (fromJust) import qualified Data.Text as T import qualified Data.Vector as V @@ -407,6 +408,21 @@ createMinimalSample (EigenstratIndEntry id_ sex pop) = -- Check fill state of each .janno column -- heavily inspired by https://stackoverflow.com/a/41524511/3216883 +-- | A typeclass to determine if a field in a .janno row is filled +class PresenceCountable a where + countPresence :: a -> Int +instance PresenceCountable (Maybe a) where + countPresence Nothing = 0 + countPresence (Just _) = 1 +instance PresenceCountable String where + countPresence _ = 1 +instance PresenceCountable GeneticSex where + countPresence _ = 1 +instance PresenceCountable (ListColumn a) where + countPresence _ = 1 +instance PresenceCountable CsvNamedRecord where + countPresence _ = 0 + -- | A function to measure how full the .janno columns are getFillStateForAllCols :: (GSOP.Generic a, GSOP.Code a ~ '[ xs ], All PresenceCountable xs) => [a] -> [Ratio Int] -- ' getFillStateForAllCols = @@ -423,28 +439,28 @@ getFillStateForAllCols = nrFilledValues = sum $ map countPresence vals in nrFilledValues % nrValues --- | A typeclass to determine if a field in a .janno row is filled -class PresenceCountable a where - countPresence :: a -> Int -instance PresenceCountable (Maybe a) where - countPresence Nothing = 0 - countPresence (Just _) = 1 -instance PresenceCountable String where - countPresence _ = 1 -instance PresenceCountable GeneticSex where - countPresence _ = 1 -instance PresenceCountable (ListColumn a) where - countPresence _ = 1 -instance PresenceCountable CsvNamedRecord where - countPresence _ = 0 - -- Janno file writing -writeJannoFile :: FilePath -> JannoRows -> IO () -writeJannoFile path (JannoRows rows) = do +writeJannoFile :: Bool -> FilePath -> JannoRows -> IO () +writeJannoFile removeEmptyCols path (JannoRows rows) = do let jannoAsBytestring = Csv.encodeByNameWith encodingOptions makeHeaderWithAdditionalColumns rows - let jannoAsBytestringwithNA = explicitNA jannoAsBytestring - Bch.writeFile path jannoAsBytestringwithNA + jannoAsBytestringwithNA = explicitNA jannoAsBytestring + if removeEmptyCols + then do + -- decode again + case Csv.decode Csv.NoHeader jannoAsBytestringwithNA :: Either String (V.Vector (V.Vector Bchs.ByteString)) of + Left _ -> error "internal error, please report" + Right x -> do + let janno = V.toList $ V.map V.toList x + jannoTransposed = transpose janno + jannoTransposedFiltered = filter (any (`notElem` ["", "n/a"]) . tail) jannoTransposed + jannoBackTransposed = transpose jannoTransposedFiltered + jannoConcat = Bchs.intercalate "\n" $ map (Bchs.intercalate "\t") jannoBackTransposed + Bchs.writeFile path jannoConcat + else do + -- just write to file + Bch.writeFile path jannoAsBytestringwithNA + where makeHeaderWithAdditionalColumns :: Csv.Header makeHeaderWithAdditionalColumns = From 94c8e5d4fb24c120503be13e6ec39084e62dc30a Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Fri, 3 Jan 2025 09:51:38 +0100 Subject: [PATCH 03/10] moved new janno writing functionality into an extra function writeJannoFileWithoutEmptyCols --- src/Poseidon/CLI/Rectify.hs | 10 ++++-- src/Poseidon/Janno.hs | 68 ++++++++++++++++++------------------- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/Poseidon/CLI/Rectify.hs b/src/Poseidon/CLI/Rectify.hs index 4bf7fadb..997e83fb 100644 --- a/src/Poseidon/CLI/Rectify.hs +++ b/src/Poseidon/CLI/Rectify.hs @@ -19,7 +19,7 @@ import Poseidon.Utils (PoseidonIO, getChecksum, logDebug, logInfo) import Poseidon.Version (VersionComponent (..), updateThreeComponentVersion) -import Poseidon.Janno (JannoRows (..), getFillStateForAllCols) +import Poseidon.Janno (writeJannoFileWithoutEmptyCols) import Control.DeepSeq ((<$!!>)) import Control.Monad.IO.Class (MonadIO, liftIO) @@ -29,7 +29,7 @@ import Data.Time (UTCTime (..), getCurrentTime) import Data.Version (Version (..), makeVersion, showVersion) import System.Directory (doesFileExist, removeFile) import System.FilePath (()) -import Data.Function ((&)) +import Control.Monad (when) data RectifyOptions = RectifyOptions { _rectifyBaseDirs :: [FilePath] @@ -80,6 +80,12 @@ runRectify (RectifyOptions rectifyOnePackage :: PoseidonPackage -> PoseidonIO () rectifyOnePackage inPac = do logInfo $ "Rectifying package: " ++ renderNameWithVersion inPac + when True $ do + case posPacJannoFile inPac of + Nothing -> return () + Just path -> do + logInfo "Reorder and remove empty columns from .janno file" + liftIO $ writeJannoFileWithoutEmptyCols path (posPacJanno inPac) updatedPacPosVer <- updatePoseidonVersion newPosVer inPac updatedPacContri <- addContributors newContributors updatedPacPosVer updatedPacChecksums <- updateChecksums checksumUpdate updatedPacContri diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index 673e6012..895dec8a 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -28,6 +28,7 @@ module Poseidon.Janno ( JannoRelationDegree (..), JannoLibraryBuilt (..), writeJannoFile, + writeJannoFileWithoutEmptyCols, readJannoFile, createMinimalJanno, createMinimalSample, @@ -441,30 +442,41 @@ getFillStateForAllCols = -- Janno file writing -writeJannoFile :: Bool -> FilePath -> JannoRows -> IO () -writeJannoFile removeEmptyCols path (JannoRows rows) = do - let jannoAsBytestring = Csv.encodeByNameWith encodingOptions makeHeaderWithAdditionalColumns rows +-- | A helper functions to replace empty bytestrings values in janno files with explicit "n/a" +explicitNA :: Bch.ByteString -> Bch.ByteString +explicitNA = replaceInJannoBytestring Bch.empty "n/a" + +replaceInJannoBytestring :: Bch.ByteString -> Bch.ByteString -> Bch.ByteString -> Bch.ByteString +replaceInJannoBytestring from to tsv = + let tsvRows = Bch.lines tsv + tsvCells = map (Bch.splitWith (=='\t')) tsvRows + tsvCellsUpdated = map (map (\y -> if y == from || y == Bch.append from "\r" then to else y)) tsvCells + tsvRowsUpdated = map (Bch.intercalate (Bch.pack "\t")) tsvCellsUpdated + in Bch.unlines tsvRowsUpdated + +makeHeaderWithAdditionalColumns :: [JannoRow] -> Csv.Header +makeHeaderWithAdditionalColumns rows = + V.fromList $ jannoHeader ++ sort (HM.keys (HM.unions (map (getCsvNR . jAdditionalColumns) rows))) + +writeJannoFile :: FilePath -> JannoRows -> IO () +writeJannoFile path (JannoRows rows) = do + let jannoAsBytestring = Csv.encodeByNameWith encodingOptions (makeHeaderWithAdditionalColumns rows) rows jannoAsBytestringwithNA = explicitNA jannoAsBytestring - if removeEmptyCols - then do - -- decode again - case Csv.decode Csv.NoHeader jannoAsBytestringwithNA :: Either String (V.Vector (V.Vector Bchs.ByteString)) of - Left _ -> error "internal error, please report" - Right x -> do - let janno = V.toList $ V.map V.toList x - jannoTransposed = transpose janno - jannoTransposedFiltered = filter (any (`notElem` ["", "n/a"]) . tail) jannoTransposed - jannoBackTransposed = transpose jannoTransposedFiltered - jannoConcat = Bchs.intercalate "\n" $ map (Bchs.intercalate "\t") jannoBackTransposed - Bchs.writeFile path jannoConcat - else do - -- just write to file - Bch.writeFile path jannoAsBytestringwithNA + Bch.writeFile path jannoAsBytestringwithNA - where - makeHeaderWithAdditionalColumns :: Csv.Header - makeHeaderWithAdditionalColumns = - V.fromList $ jannoHeader ++ sort (HM.keys (HM.unions (map (getCsvNR . jAdditionalColumns) rows))) +writeJannoFileWithoutEmptyCols :: FilePath -> JannoRows -> IO () +writeJannoFileWithoutEmptyCols path (JannoRows rows) = do + let jannoAsBytestring = Csv.encodeByNameWith encodingOptions (makeHeaderWithAdditionalColumns rows) rows + jannoAsBytestringwithNA = explicitNA jannoAsBytestring + case Csv.decode Csv.NoHeader jannoAsBytestringwithNA :: Either String (V.Vector (V.Vector Bch.ByteString)) of + Left _ -> error "internal error, please report" + Right x -> do + let janno = V.toList $ V.map V.toList x + jannoTransposed = transpose janno + jannoTransposedFiltered = filter (any (/= "n/a") . tail) jannoTransposed + jannoBackTransposed = transpose jannoTransposedFiltered + jannoConcat = Bch.intercalate "\r" $ map (Bch.intercalate "\t") jannoBackTransposed + Bch.writeFile path jannoConcat encodingOptions :: Csv.EncodeOptions encodingOptions = Csv.defaultEncodeOptions { @@ -590,18 +602,6 @@ renderCsvParseError (CsvParseError expected actual leftover) = "broken value: " ++ actual ++ ", " ++ "problematic characters: " ++ show leftover ++ ")" --- | A helper functions to replace empty bytestrings values in janno files with explicit "n/a" -explicitNA :: Bch.ByteString -> Bch.ByteString -explicitNA = replaceInJannoBytestring Bch.empty "n/a" - -replaceInJannoBytestring :: Bch.ByteString -> Bch.ByteString -> Bch.ByteString -> Bch.ByteString -replaceInJannoBytestring from to tsv = - let tsvRows = Bch.lines tsv - tsvCells = map (Bch.splitWith (=='\t')) tsvRows - tsvCellsUpdated = map (map (\y -> if y == from || y == Bch.append from "\r" then to else y)) tsvCells - tsvRowsUpdated = map (Bch.intercalate (Bch.pack "\t")) tsvCellsUpdated - in Bch.unlines tsvRowsUpdated - -- Global janno consistency checks checkJannoConsistency :: FilePath -> JannoRows -> Either PoseidonException JannoRows From 0684309b7461d6bff7ac2c2a1d5ec814cbfc064b Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Fri, 3 Jan 2025 09:57:59 +0100 Subject: [PATCH 04/10] brought the old survey module back and moved getFillStateForAllCols back out of the janno module --- src/Poseidon/CLI/Survey.hs | 62 +++++++++++++++++++++++++++++++------- src/Poseidon/Janno.hs | 49 +----------------------------- 2 files changed, 52 insertions(+), 59 deletions(-) diff --git a/src/Poseidon/CLI/Survey.hs b/src/Poseidon/CLI/Survey.hs index 0b6a574f..d9bc9f45 100644 --- a/src/Poseidon/CLI/Survey.hs +++ b/src/Poseidon/CLI/Survey.hs @@ -1,11 +1,17 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeOperators #-} module Poseidon.CLI.Survey where import Poseidon.BibFile (BibTeX) import Poseidon.GenotypeData (GenotypeDataSpec (..), GenotypeFileSpec (..)) -import Poseidon.Janno (JannoRows (..), getFillStateForAllCols) +import Poseidon.Janno (CsvNamedRecord, GeneticSex, + JannoRows (..), ListColumn (..)) import Poseidon.Package (PackageReadOptions (..), PoseidonPackage (..), defaultPackageReadOptions, @@ -15,7 +21,12 @@ import Poseidon.Utils (PoseidonIO, logInfo) import Control.Monad (forM) import Control.Monad.IO.Class (liftIO) import Data.List (intercalate, unfoldr, zip5) -import Data.Ratio (Ratio) +import Data.Ratio (Ratio, (%)) +import Generics.SOP (All, Generic (Code, from), + HCollapse (hcollapse), + HPure (hpure), I, K (K), NP, + Proxy (..), SListI, hcmap, hzipWith, + unI, unSOP, unZ) import Poseidon.SequencingSource (SeqSourceRows (..)) import System.Directory (doesFileExist) import System.FilePath (()) @@ -82,14 +93,43 @@ renderPackageWithCompleteness genoTypeDataExists janno (SeqSourceRows seqSource) renderJannoCompleteness :: JannoRows -> String renderJannoCompleteness (JannoRows rows) = - let ratioString = map prop2Char $ getFillStateForAllCols rows + let ratioString = map prop2Char $ getRatiosForEachField rows in init ratioString -- remove last entry covering the additional columns (CsvNamedRecord) + where + -- the following magic was heavily inspired by https://stackoverflow.com/a/41524511/3216883 + getRatiosForEachField :: (Generics.SOP.Generic a, Code a ~ '[ xs ], All PresenceCountable xs) => [a] -> [Ratio Int] --' + getRatiosForEachField = + hcollapse + . hcmap (Proxy :: Proxy PresenceCountable) (K . measureFillState) + . hunzip + . map (unZ . unSOP . from) + hunzip :: SListI xs => [NP I xs] -> NP [] xs + hunzip = foldr (hzipWith ((:) . unI)) (hpure []) + measureFillState :: PresenceCountable a => [a] -> Ratio Int + measureFillState vals = + let nrValues = length vals + nrFilledValues = sum $ map countPresence vals + in nrFilledValues % nrValues + prop2Char :: Ratio Int -> Char + prop2Char r + | r == 0 = '.' + | r < 0.25 = '░' + | r < 0.5 = '▒' + | r < 1 = '▓' + | r == 1 = '█' + | otherwise = '?' -prop2Char :: Ratio Int -> Char -prop2Char r - | r == 0 = '.' - | r < 0.25 = '░' - | r < 0.5 = '▒' - | r < 1 = '▓' - | r == 1 = '█' - | otherwise = '?' +-- A typeclass to determine if a field in a .janno row is filled +class PresenceCountable a where + countPresence :: a -> Int +instance PresenceCountable (Maybe a) where + countPresence Nothing = 0 + countPresence (Just _) = 1 +instance PresenceCountable String where + countPresence _ = 1 +instance PresenceCountable GeneticSex where + countPresence _ = 1 +instance PresenceCountable (ListColumn a) where + countPresence _ = 1 +instance PresenceCountable CsvNamedRecord where + countPresence _ = 0 diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index 895dec8a..85362f1d 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -6,10 +6,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} --- and these for the column fill state magic -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TypeOperators #-} -{-# LANGUAGE GADTs #-} module Poseidon.Janno ( JannoRow(..), @@ -46,8 +42,7 @@ module Poseidon.Janno ( parseCsvParseError, renderCsvParseError, getMaybeListColumn, - jannoRows2EigenstratIndEntries, - getFillStateForAllCols + jannoRows2EigenstratIndEntries ) where import Poseidon.ColumnTypes @@ -81,13 +76,6 @@ import SequenceFormats.Eigenstrat (EigenstratIndEntry (..), Sex (..)) import qualified Text.Parsec as P import qualified Text.Parsec.String as P -import Generics.SOP (All, --Generic (Code, from), - HCollapse (hcollapse), - HPure (hpure), I, K (K), NP, - Proxy (..), SListI, hcmap, hzipWith, - unI, unSOP, unZ) -import qualified Generics.SOP as GSOP -import Data.Ratio (Ratio, (%)) -- | A general datatype for janno list columns newtype ListColumn a = ListColumn {getListColumn :: [a]} @@ -405,41 +393,6 @@ createMinimalSample (EigenstratIndEntry id_ sex pop) = , jAdditionalColumns = CsvNamedRecord $ HM.fromList [] } - --- Check fill state of each .janno column --- heavily inspired by https://stackoverflow.com/a/41524511/3216883 - --- | A typeclass to determine if a field in a .janno row is filled -class PresenceCountable a where - countPresence :: a -> Int -instance PresenceCountable (Maybe a) where - countPresence Nothing = 0 - countPresence (Just _) = 1 -instance PresenceCountable String where - countPresence _ = 1 -instance PresenceCountable GeneticSex where - countPresence _ = 1 -instance PresenceCountable (ListColumn a) where - countPresence _ = 1 -instance PresenceCountable CsvNamedRecord where - countPresence _ = 0 - --- | A function to measure how full the .janno columns are -getFillStateForAllCols :: (GSOP.Generic a, GSOP.Code a ~ '[ xs ], All PresenceCountable xs) => [a] -> [Ratio Int] -- ' -getFillStateForAllCols = - hcollapse - . hcmap (Proxy :: Proxy PresenceCountable) (K . measureFillState) - . hunzip - . map (unZ . unSOP . GSOP.from) - where - hunzip :: SListI xs => [NP I xs] -> NP [] xs - hunzip = foldr (hzipWith ((:) . unI)) (hpure []) - measureFillState :: PresenceCountable a => [a] -> Ratio Int - measureFillState vals = - let nrValues = length vals - nrFilledValues = sum $ map countPresence vals - in nrFilledValues % nrValues - -- Janno file writing -- | A helper functions to replace empty bytestrings values in janno files with explicit "n/a" From 0f6ac12a9496a6dc872e92d6f9ad665be25e37e0 Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Fri, 3 Jan 2025 10:16:54 +0100 Subject: [PATCH 05/10] fixing bugs --- src/Poseidon/Janno.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index 85362f1d..e8e3e315 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -421,15 +421,15 @@ writeJannoFileWithoutEmptyCols :: FilePath -> JannoRows -> IO () writeJannoFileWithoutEmptyCols path (JannoRows rows) = do let jannoAsBytestring = Csv.encodeByNameWith encodingOptions (makeHeaderWithAdditionalColumns rows) rows jannoAsBytestringwithNA = explicitNA jannoAsBytestring - case Csv.decode Csv.NoHeader jannoAsBytestringwithNA :: Either String (V.Vector (V.Vector Bch.ByteString)) of + case Csv.decodeWith decodingOptions Csv.NoHeader jannoAsBytestringwithNA :: Either String (V.Vector (V.Vector Bch.ByteString)) of Left _ -> error "internal error, please report" Right x -> do let janno = V.toList $ V.map V.toList x jannoTransposed = transpose janno jannoTransposedFiltered = filter (any (/= "n/a") . tail) jannoTransposed jannoBackTransposed = transpose jannoTransposedFiltered - jannoConcat = Bch.intercalate "\r" $ map (Bch.intercalate "\t") jannoBackTransposed - Bch.writeFile path jannoConcat + jannoConcat = Bch.intercalate "\n" $ map (Bch.intercalate "\t") jannoBackTransposed + Bch.writeFile path (jannoConcat <> "\n") encodingOptions :: Csv.EncodeOptions encodingOptions = Csv.defaultEncodeOptions { From fe58949eb642762d8c31ff0dabf7c9a1aa7a2217 Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Fri, 3 Jan 2025 10:34:23 +0100 Subject: [PATCH 06/10] added interface option for the .janno modification --- src-executables/Main-trident.hs | 1 + src/Poseidon/CLI/OptparseApplicativeParsers.hs | 8 +++++++- src/Poseidon/CLI/Rectify.hs | 13 +++++++------ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src-executables/Main-trident.hs b/src-executables/Main-trident.hs index e5c6348b..a83e2934 100644 --- a/src-executables/Main-trident.hs +++ b/src-executables/Main-trident.hs @@ -243,6 +243,7 @@ rectifyOptParser = RectifyOptions <$> parseBasePaths <*> parseMaybePackageVersionUpdate <*> parseChecksumsToRectify <*> parseMaybeContributors + <*> parseJannoRemoveEmptyCols <*> parseOnlyLatest validateOptParser :: OP.Parser ValidateOptions diff --git a/src/Poseidon/CLI/OptparseApplicativeParsers.hs b/src/Poseidon/CLI/OptparseApplicativeParsers.hs index a3a82fe3..d0a5b11b 100644 --- a/src/Poseidon/CLI/OptparseApplicativeParsers.hs +++ b/src/Poseidon/CLI/OptparseApplicativeParsers.hs @@ -236,7 +236,13 @@ readContributorString s = case P.runParser contributorSpecParser () "" s of Left p -> Left (showParsecErr p) Right x -> Right x - +parseJannoRemoveEmptyCols :: OP.Parser Bool +parseJannoRemoveEmptyCols = OP.switch ( + OP.long "jannoRemoveEmpty" <> + OP.help "Reorder the .janno file and remove empty colums. \ + \Remember to pair this option with --checksumJanno to also update the checksum." + ) + parseMaybeLog :: OP.Parser (Maybe String) parseMaybeLog = OP.option (Just <$> OP.str) ( OP.long "logText" <> diff --git a/src/Poseidon/CLI/Rectify.hs b/src/Poseidon/CLI/Rectify.hs index 997e83fb..b4f8d595 100644 --- a/src/Poseidon/CLI/Rectify.hs +++ b/src/Poseidon/CLI/Rectify.hs @@ -16,7 +16,7 @@ import Poseidon.Package (PackageReadOptions (..), readPoseidonPackageCollection, writePoseidonPackage) import Poseidon.Utils (PoseidonIO, getChecksum, logDebug, - logInfo) + logInfo, logWarning) import Poseidon.Version (VersionComponent (..), updateThreeComponentVersion) import Poseidon.Janno (writeJannoFileWithoutEmptyCols) @@ -38,7 +38,7 @@ data RectifyOptions = RectifyOptions , _rectifyPackageVersionUpdate :: Maybe PackageVersionUpdate , _rectifyChecksums :: ChecksumsToRectify , _rectifyNewContributors :: Maybe [ContributorSpec] - --, _recitfyJannoRemoveEmptyCols :: Bool + , _recitfyJannoRemoveEmptyCols :: Bool , _rectifyOnlyLatest :: Bool } @@ -61,7 +61,7 @@ runRectify :: RectifyOptions -> PoseidonIO () runRectify (RectifyOptions baseDirs ignorePosVer newPosVer pacVerUpdate checksumUpdate newContributors - --jannoRemoveEmptyCols + jannoRemoveEmptyCols onlyLatest ) = do let pacReadOpts = defaultPackageReadOptions { @@ -80,11 +80,12 @@ runRectify (RectifyOptions rectifyOnePackage :: PoseidonPackage -> PoseidonIO () rectifyOnePackage inPac = do logInfo $ "Rectifying package: " ++ renderNameWithVersion inPac - when True $ do + when jannoRemoveEmptyCols $ do case posPacJannoFile inPac of - Nothing -> return () + Nothing -> do + logWarning "No .janno file to modify with --jannoRemoveEmpty" Just path -> do - logInfo "Reorder and remove empty columns from .janno file" + logInfo "Reordering and removing empty columns from .janno file" liftIO $ writeJannoFileWithoutEmptyCols path (posPacJanno inPac) updatedPacPosVer <- updatePoseidonVersion newPosVer inPac updatedPacContri <- addContributors newContributors updatedPacPosVer From 3aaccbd7afec3aced4bea815e3dea4786334007d Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Fri, 3 Jan 2025 11:20:02 +0100 Subject: [PATCH 07/10] update of tests --- src/Poseidon/CLI/Rectify.hs | 6 ++--- .../GoldenTestCheckSumFile.txt | 5 ++++- .../chronicle/Schiffels/POSEIDON.yml | 2 +- .../chronicle/Schiffels/Schiffels.janno | 22 +++++++++---------- .../GoldenTestData/chronicle/chronicle2.yml | 14 ++++++------ .../init/Schiffels/POSEIDON.yml | 2 +- .../init/Schiffels/Schiffels.janno | 22 +++++++++---------- .../timetravel/Schiffels-1.1.1/POSEIDON.yml | 2 +- .../Schiffels-1.1.1/Schiffels.janno | 22 +++++++++---------- .../GoldenTestsRunCommands.hs | 19 ++++++++++++++++ 10 files changed, 69 insertions(+), 47 deletions(-) diff --git a/src/Poseidon/CLI/Rectify.hs b/src/Poseidon/CLI/Rectify.hs index b4f8d595..bb27017e 100644 --- a/src/Poseidon/CLI/Rectify.hs +++ b/src/Poseidon/CLI/Rectify.hs @@ -38,7 +38,7 @@ data RectifyOptions = RectifyOptions , _rectifyPackageVersionUpdate :: Maybe PackageVersionUpdate , _rectifyChecksums :: ChecksumsToRectify , _rectifyNewContributors :: Maybe [ContributorSpec] - , _recitfyJannoRemoveEmptyCols :: Bool + , _rectifyJannoRemoveEmptyCols :: Bool , _rectifyOnlyLatest :: Bool } @@ -84,9 +84,9 @@ runRectify (RectifyOptions case posPacJannoFile inPac of Nothing -> do logWarning "No .janno file to modify with --jannoRemoveEmpty" - Just path -> do + Just jannoPath -> do logInfo "Reordering and removing empty columns from .janno file" - liftIO $ writeJannoFileWithoutEmptyCols path (posPacJanno inPac) + liftIO $ writeJannoFileWithoutEmptyCols (posPacBaseDir inPac jannoPath) (posPacJanno inPac) updatedPacPosVer <- updatePoseidonVersion newPosVer inPac updatedPacContri <- addContributors newContributors updatedPacPosVer updatedPacChecksums <- updateChecksums checksumUpdate updatedPacContri diff --git a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt index 415bb16f..6628f966 100644 --- a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt +++ b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt @@ -55,6 +55,9 @@ da981f8d52f60ec4d96865b224648c92 rectify init/Schiffels/POSEIDON.yml 3bb396e099d5b8771a3409f5fe85d70b rectify init/Schiffels/CHANGELOG.md dc322649188ce2995cea8a46a7f97f3e rectify init/Schiffels/POSEIDON.yml 3bb396e099d5b8771a3409f5fe85d70b rectify init/Schiffels/CHANGELOG.md +a61f78b4e9b7e3c7e00ec7bc6aaab95b rectify init/Schiffels/POSEIDON.yml +3bb396e099d5b8771a3409f5fe85d70b rectify init/Schiffels/CHANGELOG.md +083fe7ef4206c979356a3a2454d780b1 rectify init/Schiffels/Schiffels.janno 2757f727e02dd6453fffe68c4c6ec4c8 forge forge/ForgePac1/POSEIDON.yml 1286a2580e4bfbed7d804d5f3fe125f7 forge forge/ForgePac1/ForgePac1.geno 8846333d9a1de6510f25a3816cc70fef forge forge/ForgePac1/ForgePac1.janno @@ -125,7 +128,7 @@ e375863bca9e4a91c9855396abde31c7 forge forge/ForgePac20/ForgePac20.janno d4a05cfef045648238a94a9d621cf667 chronicle chronicle/chronicle1.yml b43da4d5734371c0648553120f812466 timetravel timetravel/Lamnidis_2018-1.0.0/POSEIDON.yml 8d57ce1a1ab28c0d8a5f391dd790a59c timetravel timetravel/Lamnidis_2018-1.0.1/POSEIDON.yml -dc322649188ce2995cea8a46a7f97f3e timetravel timetravel/Schiffels-1.1.1/POSEIDON.yml +a61f78b4e9b7e3c7e00ec7bc6aaab95b timetravel timetravel/Schiffels-1.1.1/POSEIDON.yml 1ab24c45ef3a13e0fb34afac7a21dca8 timetravel timetravel/Schmid_2028-1.0.0/POSEIDON.yml 8d57ce1a1ab28c0d8a5f391dd790a59c fetch fetch/by_package/Lamnidis_2018-1.0.1/POSEIDON.yml 1ab24c45ef3a13e0fb34afac7a21dca8 fetch fetch/by_package/Schmid_2028-1.0.0/POSEIDON.yml diff --git a/test/PoseidonGoldenTests/GoldenTestData/chronicle/Schiffels/POSEIDON.yml b/test/PoseidonGoldenTests/GoldenTestData/chronicle/Schiffels/POSEIDON.yml index 1feb73d5..1e71dd7f 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/chronicle/Schiffels/POSEIDON.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/chronicle/Schiffels/POSEIDON.yml @@ -16,7 +16,7 @@ genotypeData: indFile: ind.txt snpSet: Other jannoFile: Schiffels.janno -jannoFileChkSum: fd632717ecaf337a39cfd7a828a54e99 +jannoFileChkSum: 083fe7ef4206c979356a3a2454d780b1 bibFile: Schiffels.bib bibFileChkSum: 9edc4a757f785a8ecb59c54d16c5690a changelogFile: CHANGELOG.md diff --git a/test/PoseidonGoldenTests/GoldenTestData/chronicle/Schiffels/Schiffels.janno b/test/PoseidonGoldenTests/GoldenTestData/chronicle/Schiffels/Schiffels.janno index 3bf96b2c..c80c86ca 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/chronicle/Schiffels/Schiffels.janno +++ b/test/PoseidonGoldenTests/GoldenTestData/chronicle/Schiffels/Schiffels.janno @@ -1,11 +1,11 @@ -Poseidon_ID Genetic_Sex Group_Name Alternative_IDs Relation_To Relation_Degree Relation_Type Relation_Note Collection_ID Country Country_ISO Location Site Latitude Longitude Date_Type Date_C14_Labnr Date_C14_Uncal_BP Date_C14_Uncal_BP_Err Date_BC_AD_Start Date_BC_AD_Median Date_BC_AD_Stop Date_Note MT_Haplogroup Y_Haplogroup Source_Tissue Nr_Libraries Library_Names Capture_Type UDG Library_Built Genotype_Ploidy Data_Preparation_Pipeline_URL Endogenous Nr_SNPs Coverage_on_Target_SNPs Damage Contamination Contamination_Err Contamination_Meas Contamination_Note Genetic_Source_Accession_IDs Primary_Contact Publication Note Keywords -XXX001 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX002 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX003 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX004 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX005 M POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX006 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX007 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX008 F POP3 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX009 F POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX010 M POP3 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a +Poseidon_ID Genetic_Sex Group_Name +XXX001 M POP1 +XXX002 F POP2 +XXX003 M POP1 +XXX004 F POP2 +XXX005 M POP2 +XXX006 F POP2 +XXX007 M POP1 +XXX008 F POP3 +XXX009 F POP1 +XXX010 M POP3 diff --git a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml index eca10705..9eaa28a6 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml @@ -1,29 +1,29 @@ title: Chronicle title description: Chronicle description chronicleVersion: 0.2.0 -lastModified: 2024-11-13 +lastModified: 2025-01-03 packages: - title: Lamnidis_2018 version: 1.0.0 - commit: c59bfb82fec3f2742cc0e10ceb2932ee06e56aa1 + commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 path: Lamnidis_2018 - title: Lamnidis_2018 version: 1.0.1 - commit: c59bfb82fec3f2742cc0e10ceb2932ee06e56aa1 + commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 path: Lamnidis_2018_newVersion - title: Schiffels version: 1.1.1 - commit: a32a46cf82b8895af72c8920be4ca4843cd5e7f7 + commit: cf3deedf474ef0a651fdcfe5e92085e7810cb816 path: Schiffels - title: Schiffels_2016 version: 1.0.1 - commit: c59bfb82fec3f2742cc0e10ceb2932ee06e56aa1 + commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 path: Schiffels_2016 - title: Schmid_2028 version: 1.0.0 - commit: c59bfb82fec3f2742cc0e10ceb2932ee06e56aa1 + commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 path: Schmid_2028 - title: Wang_2020 version: 0.1.0 - commit: c59bfb82fec3f2742cc0e10ceb2932ee06e56aa1 + commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 path: Wang_2020 diff --git a/test/PoseidonGoldenTests/GoldenTestData/init/Schiffels/POSEIDON.yml b/test/PoseidonGoldenTests/GoldenTestData/init/Schiffels/POSEIDON.yml index 1feb73d5..1e71dd7f 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/init/Schiffels/POSEIDON.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/init/Schiffels/POSEIDON.yml @@ -16,7 +16,7 @@ genotypeData: indFile: ind.txt snpSet: Other jannoFile: Schiffels.janno -jannoFileChkSum: fd632717ecaf337a39cfd7a828a54e99 +jannoFileChkSum: 083fe7ef4206c979356a3a2454d780b1 bibFile: Schiffels.bib bibFileChkSum: 9edc4a757f785a8ecb59c54d16c5690a changelogFile: CHANGELOG.md diff --git a/test/PoseidonGoldenTests/GoldenTestData/init/Schiffels/Schiffels.janno b/test/PoseidonGoldenTests/GoldenTestData/init/Schiffels/Schiffels.janno index 3bf96b2c..c80c86ca 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/init/Schiffels/Schiffels.janno +++ b/test/PoseidonGoldenTests/GoldenTestData/init/Schiffels/Schiffels.janno @@ -1,11 +1,11 @@ -Poseidon_ID Genetic_Sex Group_Name Alternative_IDs Relation_To Relation_Degree Relation_Type Relation_Note Collection_ID Country Country_ISO Location Site Latitude Longitude Date_Type Date_C14_Labnr Date_C14_Uncal_BP Date_C14_Uncal_BP_Err Date_BC_AD_Start Date_BC_AD_Median Date_BC_AD_Stop Date_Note MT_Haplogroup Y_Haplogroup Source_Tissue Nr_Libraries Library_Names Capture_Type UDG Library_Built Genotype_Ploidy Data_Preparation_Pipeline_URL Endogenous Nr_SNPs Coverage_on_Target_SNPs Damage Contamination Contamination_Err Contamination_Meas Contamination_Note Genetic_Source_Accession_IDs Primary_Contact Publication Note Keywords -XXX001 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX002 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX003 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX004 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX005 M POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX006 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX007 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX008 F POP3 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX009 F POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX010 M POP3 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a +Poseidon_ID Genetic_Sex Group_Name +XXX001 M POP1 +XXX002 F POP2 +XXX003 M POP1 +XXX004 F POP2 +XXX005 M POP2 +XXX006 F POP2 +XXX007 M POP1 +XXX008 F POP3 +XXX009 F POP1 +XXX010 M POP3 diff --git a/test/PoseidonGoldenTests/GoldenTestData/timetravel/Schiffels-1.1.1/POSEIDON.yml b/test/PoseidonGoldenTests/GoldenTestData/timetravel/Schiffels-1.1.1/POSEIDON.yml index 1feb73d5..1e71dd7f 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/timetravel/Schiffels-1.1.1/POSEIDON.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/timetravel/Schiffels-1.1.1/POSEIDON.yml @@ -16,7 +16,7 @@ genotypeData: indFile: ind.txt snpSet: Other jannoFile: Schiffels.janno -jannoFileChkSum: fd632717ecaf337a39cfd7a828a54e99 +jannoFileChkSum: 083fe7ef4206c979356a3a2454d780b1 bibFile: Schiffels.bib bibFileChkSum: 9edc4a757f785a8ecb59c54d16c5690a changelogFile: CHANGELOG.md diff --git a/test/PoseidonGoldenTests/GoldenTestData/timetravel/Schiffels-1.1.1/Schiffels.janno b/test/PoseidonGoldenTests/GoldenTestData/timetravel/Schiffels-1.1.1/Schiffels.janno index 3bf96b2c..c80c86ca 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/timetravel/Schiffels-1.1.1/Schiffels.janno +++ b/test/PoseidonGoldenTests/GoldenTestData/timetravel/Schiffels-1.1.1/Schiffels.janno @@ -1,11 +1,11 @@ -Poseidon_ID Genetic_Sex Group_Name Alternative_IDs Relation_To Relation_Degree Relation_Type Relation_Note Collection_ID Country Country_ISO Location Site Latitude Longitude Date_Type Date_C14_Labnr Date_C14_Uncal_BP Date_C14_Uncal_BP_Err Date_BC_AD_Start Date_BC_AD_Median Date_BC_AD_Stop Date_Note MT_Haplogroup Y_Haplogroup Source_Tissue Nr_Libraries Library_Names Capture_Type UDG Library_Built Genotype_Ploidy Data_Preparation_Pipeline_URL Endogenous Nr_SNPs Coverage_on_Target_SNPs Damage Contamination Contamination_Err Contamination_Meas Contamination_Note Genetic_Source_Accession_IDs Primary_Contact Publication Note Keywords -XXX001 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX002 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX003 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX004 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX005 M POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX006 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX007 M POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX008 F POP3 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX009 F POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a -XXX010 M POP3 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a +Poseidon_ID Genetic_Sex Group_Name +XXX001 M POP1 +XXX002 F POP2 +XXX003 M POP1 +XXX004 F POP2 +XXX005 M POP2 +XXX006 F POP2 +XXX007 M POP1 +XXX008 F POP3 +XXX009 F POP1 +XXX010 M POP3 diff --git a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs index 8462a70a..2f9fbfe7 100644 --- a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs +++ b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs @@ -554,6 +554,7 @@ testPipelineRectify testDir checkFilePath = do , _rectifyPackageVersionUpdate = Just (PackageVersionUpdate Major (Just "test1")) , _rectifyChecksums = ChecksumNone , _rectifyNewContributors = Nothing + , _rectifyJannoRemoveEmptyCols = False , _rectifyOnlyLatest = False } let action1 = testLog (runRectify rectifyOpts1) >> patchLastModified testDir ("init" "Schiffels" "POSEIDON.yml") @@ -568,6 +569,7 @@ testPipelineRectify testDir checkFilePath = do , _rectifyPackageVersionUpdate = Just (PackageVersionUpdate Minor (Just "test2")) , _rectifyChecksums = ChecksumAll , _rectifyNewContributors = Nothing + , _rectifyJannoRemoveEmptyCols = False , _rectifyOnlyLatest = False } let action2 = testLog (runRectify rectifyOpts2) >> patchLastModified testDir ("init" "Schiffels" "POSEIDON.yml") @@ -585,6 +587,7 @@ testPipelineRectify testDir checkFilePath = do ContributorSpec "Josiah Carberry" "carberry@brown.edu" (Just $ ORCID {_orcidNums = "000000021825009", _orcidChecksum = '7'}) , ContributorSpec "Herbert Testmann" "herbert@testmann.tw" Nothing ] + , _rectifyJannoRemoveEmptyCols = False , _rectifyOnlyLatest = False } let action3 = testLog (runRectify rectifyOpts3) >> patchLastModified testDir ("init" "Schiffels" "POSEIDON.yml") @@ -592,6 +595,22 @@ testPipelineRectify testDir checkFilePath = do "init" "Schiffels" "POSEIDON.yml" , "init" "Schiffels" "CHANGELOG.md" ] + let rectifyOpts4 = RectifyOptions { + _rectifyBaseDirs = [testDir "init" "Schiffels"] + , _rectifyPoseidonVersion = Nothing + , _rectifyIgnorePoseidonVersion = False + , _rectifyPackageVersionUpdate = Nothing + , _rectifyChecksums = ChecksumAll + , _rectifyNewContributors = Nothing + , _rectifyJannoRemoveEmptyCols = True + , _rectifyOnlyLatest = False + } + let action4 = testLog (runRectify rectifyOpts4) >> patchLastModified testDir ("init" "Schiffels" "POSEIDON.yml") + runAndChecksumFiles checkFilePath testDir action4 "rectify" [ + "init" "Schiffels" "POSEIDON.yml" + , "init" "Schiffels" "CHANGELOG.md" + , "init" "Schiffels" "Schiffels.janno" + ] testPipelineForge :: FilePath -> FilePath -> IO () testPipelineForge testDir checkFilePath = do From bfdcdbc9da49a05d252f802afadc956c30ebac5c Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Fri, 3 Jan 2025 11:22:52 +0100 Subject: [PATCH 08/10] stylish haskell --- src/Poseidon/CLI/OptparseApplicativeParsers.hs | 2 +- src/Poseidon/CLI/Rectify.hs | 4 ++-- src/Poseidon/Janno.hs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Poseidon/CLI/OptparseApplicativeParsers.hs b/src/Poseidon/CLI/OptparseApplicativeParsers.hs index d0a5b11b..64b38fc1 100644 --- a/src/Poseidon/CLI/OptparseApplicativeParsers.hs +++ b/src/Poseidon/CLI/OptparseApplicativeParsers.hs @@ -242,7 +242,7 @@ parseJannoRemoveEmptyCols = OP.switch ( OP.help "Reorder the .janno file and remove empty colums. \ \Remember to pair this option with --checksumJanno to also update the checksum." ) - + parseMaybeLog :: OP.Parser (Maybe String) parseMaybeLog = OP.option (Just <$> OP.str) ( OP.long "logText" <> diff --git a/src/Poseidon/CLI/Rectify.hs b/src/Poseidon/CLI/Rectify.hs index bb27017e..72583811 100644 --- a/src/Poseidon/CLI/Rectify.hs +++ b/src/Poseidon/CLI/Rectify.hs @@ -10,6 +10,7 @@ import Poseidon.EntityTypes (HasNameAndVersion (..), renderNameWithVersion) import Poseidon.GenotypeData (GenotypeDataSpec (..), GenotypeFileSpec (..)) +import Poseidon.Janno (writeJannoFileWithoutEmptyCols) import Poseidon.Package (PackageReadOptions (..), PoseidonPackage (..), defaultPackageReadOptions, @@ -19,9 +20,9 @@ import Poseidon.Utils (PoseidonIO, getChecksum, logDebug, logInfo, logWarning) import Poseidon.Version (VersionComponent (..), updateThreeComponentVersion) -import Poseidon.Janno (writeJannoFileWithoutEmptyCols) import Control.DeepSeq ((<$!!>)) +import Control.Monad (when) import Control.Monad.IO.Class (MonadIO, liftIO) import Data.List (nub) import Data.Maybe (fromJust) @@ -29,7 +30,6 @@ import Data.Time (UTCTime (..), getCurrentTime) import Data.Version (Version (..), makeVersion, showVersion) import System.Directory (doesFileExist, removeFile) import System.FilePath (()) -import Control.Monad (when) data RectifyOptions = RectifyOptions { _rectifyBaseDirs :: [FilePath] diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index e8e3e315..55d27f2b 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -65,7 +65,7 @@ import Data.Either (lefts, rights) import qualified Data.HashMap.Strict as HM import Data.List (elemIndex, foldl', intercalate, nub, sort, - (\\), transpose) + transpose, (\\)) import Data.Maybe (fromJust) import qualified Data.Text as T import qualified Data.Vector as V From 4bda57cf4fe9e745824bd85a39c9a52b02d81e17 Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Thu, 9 Jan 2025 16:41:00 +0100 Subject: [PATCH 09/10] trying to improve the explicitNA mechanism --- src/Poseidon/Janno.hs | 23 ++++++----------------- src/Poseidon/SequencingSource.hs | 5 ++--- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index 55d27f2b..daa3b752 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -283,8 +283,11 @@ cleanInput (Just rawInputBS) = transNA rawInputBS transNA "n/a" = Nothing transNA x = Just x +explicitNA :: Csv.NamedRecord -> Csv.NamedRecord +explicitNA = HM.map (\x -> if x == "" then "n/a" else x) + instance Csv.ToNamedRecord JannoRow where - toNamedRecord j = Csv.namedRecord [ + toNamedRecord j = explicitNA $ Csv.namedRecord [ "Poseidon_ID" Csv..= jPoseidonID j , "Genetic_Sex" Csv..= jGeneticSex j , "Group_Name" Csv..= jGroupName j @@ -395,18 +398,6 @@ createMinimalSample (EigenstratIndEntry id_ sex pop) = -- Janno file writing --- | A helper functions to replace empty bytestrings values in janno files with explicit "n/a" -explicitNA :: Bch.ByteString -> Bch.ByteString -explicitNA = replaceInJannoBytestring Bch.empty "n/a" - -replaceInJannoBytestring :: Bch.ByteString -> Bch.ByteString -> Bch.ByteString -> Bch.ByteString -replaceInJannoBytestring from to tsv = - let tsvRows = Bch.lines tsv - tsvCells = map (Bch.splitWith (=='\t')) tsvRows - tsvCellsUpdated = map (map (\y -> if y == from || y == Bch.append from "\r" then to else y)) tsvCells - tsvRowsUpdated = map (Bch.intercalate (Bch.pack "\t")) tsvCellsUpdated - in Bch.unlines tsvRowsUpdated - makeHeaderWithAdditionalColumns :: [JannoRow] -> Csv.Header makeHeaderWithAdditionalColumns rows = V.fromList $ jannoHeader ++ sort (HM.keys (HM.unions (map (getCsvNR . jAdditionalColumns) rows))) @@ -414,14 +405,12 @@ makeHeaderWithAdditionalColumns rows = writeJannoFile :: FilePath -> JannoRows -> IO () writeJannoFile path (JannoRows rows) = do let jannoAsBytestring = Csv.encodeByNameWith encodingOptions (makeHeaderWithAdditionalColumns rows) rows - jannoAsBytestringwithNA = explicitNA jannoAsBytestring - Bch.writeFile path jannoAsBytestringwithNA + Bch.writeFile path jannoAsBytestring writeJannoFileWithoutEmptyCols :: FilePath -> JannoRows -> IO () writeJannoFileWithoutEmptyCols path (JannoRows rows) = do let jannoAsBytestring = Csv.encodeByNameWith encodingOptions (makeHeaderWithAdditionalColumns rows) rows - jannoAsBytestringwithNA = explicitNA jannoAsBytestring - case Csv.decodeWith decodingOptions Csv.NoHeader jannoAsBytestringwithNA :: Either String (V.Vector (V.Vector Bch.ByteString)) of + case Csv.decodeWith decodingOptions Csv.NoHeader jannoAsBytestring :: Either String (V.Vector (V.Vector Bch.ByteString)) of Left _ -> error "internal error, please report" Right x -> do let janno = V.toList $ V.map V.toList x diff --git a/src/Poseidon/SequencingSource.hs b/src/Poseidon/SequencingSource.hs index 918595b3..6a3ce78f 100644 --- a/src/Poseidon/SequencingSource.hs +++ b/src/Poseidon/SequencingSource.hs @@ -7,7 +7,7 @@ module Poseidon.SequencingSource where import Poseidon.Janno (CsvNamedRecord (..), JannoStringList, ListColumn (..), decodingOptions, encodingOptions, - explicitNA, filterLookup, + filterLookup, filterLookupOptional, getCsvNR, parseCsvParseError, removeUselessSuffix, @@ -385,8 +385,7 @@ instance Csv.ToNamedRecord SeqSourceRow where writeSeqSourceFile :: FilePath -> SeqSourceRows -> IO () writeSeqSourceFile path (SeqSourceRows rows) = do let seqSourceAsBytestring = Csv.encodeByNameWith encodingOptions makeHeaderWithAdditionalColumns rows - let seqSourceAsBytestringwithNA = explicitNA seqSourceAsBytestring - Bch.writeFile path seqSourceAsBytestringwithNA + Bch.writeFile path seqSourceAsBytestring where makeHeaderWithAdditionalColumns :: Csv.Header makeHeaderWithAdditionalColumns = From f2cc266fd2e1874c1e233d560236b489a1a91c9a Mon Sep 17 00:00:00 2001 From: Clemens Schmid Date: Thu, 9 Jan 2025 17:09:58 +0100 Subject: [PATCH 10/10] further improving this mechanism and adding it for ssf files --- src/Poseidon/Janno.hs | 2 +- src/Poseidon/SequencingSource.hs | 4 +- .../GoldenTestCheckSumFile.txt | 10 ++-- .../GoldenTestData/chronicle/chronicle2.yml | 14 +++--- .../GoldenTestData/list/list3 | 20 ++++---- .../GoldenTestData/list/list4 | 20 ++++---- .../GoldenTestData/list/list6 | 20 ++++---- .../GoldenTestData/listRemote/listRemote3 | 2 +- .../GoldenTestData/listRemote/listRemote5 | 50 +++++++++---------- 9 files changed, 71 insertions(+), 71 deletions(-) diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index daa3b752..827b0a8d 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -284,7 +284,7 @@ cleanInput (Just rawInputBS) = transNA rawInputBS transNA x = Just x explicitNA :: Csv.NamedRecord -> Csv.NamedRecord -explicitNA = HM.map (\x -> if x == "" then "n/a" else x) +explicitNA = HM.map (\x -> if Bchs.null x then "n/a" else x) instance Csv.ToNamedRecord JannoRow where toNamedRecord j = explicitNA $ Csv.namedRecord [ diff --git a/src/Poseidon/SequencingSource.hs b/src/Poseidon/SequencingSource.hs index 6a3ce78f..0c52887c 100644 --- a/src/Poseidon/SequencingSource.hs +++ b/src/Poseidon/SequencingSource.hs @@ -7,7 +7,7 @@ module Poseidon.SequencingSource where import Poseidon.Janno (CsvNamedRecord (..), JannoStringList, ListColumn (..), decodingOptions, encodingOptions, - filterLookup, + explicitNA, filterLookup, filterLookupOptional, getCsvNR, parseCsvParseError, removeUselessSuffix, @@ -355,7 +355,7 @@ instance Csv.FromNamedRecord SeqSourceRow where <*> pure (CsvNamedRecord (m `HM.difference` seqSourceRefHashMap)) instance Csv.ToNamedRecord SeqSourceRow where - toNamedRecord s = Csv.namedRecord [ + toNamedRecord s = explicitNA $ Csv.namedRecord [ "poseidon_IDs" Csv..= sPoseidonID s , "udg" Csv..= sUDG s , "library_built" Csv..= sLibraryBuilt s diff --git a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt index 6628f966..df9c3837 100644 --- a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt +++ b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt @@ -22,10 +22,10 @@ d41d8cd98f00b204e9800998ecf8427e validate validate/validate8 d41d8cd98f00b204e9800998ecf8427e validate validate/validate9 bc636b9c03ea9359acd254a9911e5af3 list list/list1 b18847f5498ae55882689b75916fdf64 list list/list2 -63ef5f277f6f29163192382234211224 list list/list3 -1c1f24de305405ece44393d378c0e15a list list/list4 +d6eafec9087c88ed6a95e4a22f1f306c list list/list3 +27c6118de70743426f8d8157ddd0f1c7 list list/list4 bc636b9c03ea9359acd254a9911e5af3 list list/list5 -ad5590b0ad65e64d6b2c8d874571c9f8 list list/list6 +b69c17ad4893e4e9bdcb767a229eaccb list list/list6 b197fb8dd883c7469a4791e4a677f1c0 summarise summarise/summarise1 d9e4b3f15d4e129a365d2064198d95b6 summarise summarise/summarise2 a1186fdad9ed555dff4dd61dc9838645 survey survey/survey1 @@ -144,9 +144,9 @@ b43da4d5734371c0648553120f812466 fetch fetch/multi_packages_2/Lamnidis_2018-1.0. 8d57ce1a1ab28c0d8a5f391dd790a59c fetch fetch/multi_packages_2/Lamnidis_2018-1.0.1/POSEIDON.yml 1d2a588b88e6d1017147c01f19d0b878 listRemote listRemote/listRemote1 0ddad9ea097bca0253e0c3c6157efa68 listRemote listRemote/listRemote2 -b2286cf9af7c6c8757b8109a1f58e2d9 listRemote listRemote/listRemote3 +705ecf31acfb9f21bfdc5bf4e77c10cd listRemote listRemote/listRemote3 0433b2a80ee5a2eb5bf8c6404130e562 listRemote listRemote/listRemote4 -8a13e5b31acabca6839100f411c38453 listRemote listRemote/listRemote5 +06eb810bcba832e75d72f10800ea7774 listRemote listRemote/listRemote5 eb610918796e03da3b0035655e9f8faa jannocoalesce jannocoalesce/target1.janno df34d0542c0a94cf9556619bff2e301d jannocoalesce jannocoalesce/target2.janno cc76b2bf0ad6637ea6502fdefcca3508 jannocoalesce jannocoalesce/target3.janno \ No newline at end of file diff --git a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml index 9eaa28a6..585858ec 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml @@ -1,29 +1,29 @@ title: Chronicle title description: Chronicle description chronicleVersion: 0.2.0 -lastModified: 2025-01-03 +lastModified: 2025-01-09 packages: - title: Lamnidis_2018 version: 1.0.0 - commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 + commit: 4262db441fc73a3a0fabe7165ba13261f0a994f2 path: Lamnidis_2018 - title: Lamnidis_2018 version: 1.0.1 - commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 + commit: 4262db441fc73a3a0fabe7165ba13261f0a994f2 path: Lamnidis_2018_newVersion - title: Schiffels version: 1.1.1 - commit: cf3deedf474ef0a651fdcfe5e92085e7810cb816 + commit: 88e86f51139053f19d55f453b9fbcaae205d37a3 path: Schiffels - title: Schiffels_2016 version: 1.0.1 - commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 + commit: 4262db441fc73a3a0fabe7165ba13261f0a994f2 path: Schiffels_2016 - title: Schmid_2028 version: 1.0.0 - commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 + commit: 4262db441fc73a3a0fabe7165ba13261f0a994f2 path: Schmid_2028 - title: Wang_2020 version: 0.1.0 - commit: e59bbf7865a783e78979e2bf9f757a8aa9020656 + commit: 4262db441fc73a3a0fabe7165ba13261f0a994f2 path: Wang_2020 diff --git a/test/PoseidonGoldenTests/GoldenTestData/list/list3 b/test/PoseidonGoldenTests/GoldenTestData/list/list3 index b4289eec..1410b8b4 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/list/list3 +++ b/test/PoseidonGoldenTests/GoldenTestData/list/list3 @@ -1,14 +1,14 @@ .------------.-------.----------------.----------------.-----------.---------.---------. | Individual | Group | Package | PackageVersion | Is Latest | Country | Nr_SNPs | :============:=======:================:================:===========:=========:=========: -| XXX001 | POP1 | Schiffels_2016 | 1.0.1 | True | | | -| XXX002 | POP2 | Schiffels_2016 | 1.0.1 | True | | | -| XXX003 | POP1 | Schiffels_2016 | 1.0.1 | True | | | -| XXX004 | POP2 | Schiffels_2016 | 1.0.1 | True | | | -| XXX005 | POP2 | Schiffels_2016 | 1.0.1 | True | | | -| XXX006 | POP2 | Schiffels_2016 | 1.0.1 | True | | | -| XXX007 | POP1 | Schiffels_2016 | 1.0.1 | True | | | -| XXX008 | POP3 | Schiffels_2016 | 1.0.1 | True | | | -| XXX009 | POP1 | Schiffels_2016 | 1.0.1 | True | | | -| XXX010 | POP3 | Schiffels_2016 | 1.0.1 | True | | | +| XXX001 | POP1 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX002 | POP2 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX003 | POP1 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX004 | POP2 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX005 | POP2 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX006 | POP2 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX007 | POP1 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX008 | POP3 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX009 | POP1 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | +| XXX010 | POP3 | Schiffels_2016 | 1.0.1 | True | n/a | n/a | '------------'-------'----------------'----------------'-----------'---------'---------' diff --git a/test/PoseidonGoldenTests/GoldenTestData/list/list4 b/test/PoseidonGoldenTests/GoldenTestData/list/list4 index 51ab7cb0..c3e3e8e8 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/list/list4 +++ b/test/PoseidonGoldenTests/GoldenTestData/list/list4 @@ -1,11 +1,11 @@ Individual Group Package PackageVersion Is Latest Country Nr_SNPs -XXX001 POP1 Schiffels_2016 1.0.1 True -XXX002 POP2 Schiffels_2016 1.0.1 True -XXX003 POP1 Schiffels_2016 1.0.1 True -XXX004 POP2 Schiffels_2016 1.0.1 True -XXX005 POP2 Schiffels_2016 1.0.1 True -XXX006 POP2 Schiffels_2016 1.0.1 True -XXX007 POP1 Schiffels_2016 1.0.1 True -XXX008 POP3 Schiffels_2016 1.0.1 True -XXX009 POP1 Schiffels_2016 1.0.1 True -XXX010 POP3 Schiffels_2016 1.0.1 True +XXX001 POP1 Schiffels_2016 1.0.1 True n/a n/a +XXX002 POP2 Schiffels_2016 1.0.1 True n/a n/a +XXX003 POP1 Schiffels_2016 1.0.1 True n/a n/a +XXX004 POP2 Schiffels_2016 1.0.1 True n/a n/a +XXX005 POP2 Schiffels_2016 1.0.1 True n/a n/a +XXX006 POP2 Schiffels_2016 1.0.1 True n/a n/a +XXX007 POP1 Schiffels_2016 1.0.1 True n/a n/a +XXX008 POP3 Schiffels_2016 1.0.1 True n/a n/a +XXX009 POP1 Schiffels_2016 1.0.1 True n/a n/a +XXX010 POP3 Schiffels_2016 1.0.1 True n/a n/a diff --git a/test/PoseidonGoldenTests/GoldenTestData/list/list6 b/test/PoseidonGoldenTests/GoldenTestData/list/list6 index dc63d84c..b38f46bc 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/list/list6 +++ b/test/PoseidonGoldenTests/GoldenTestData/list/list6 @@ -1,14 +1,14 @@ .------------.-------.----------------.----------------.-----------.-------------.-----------------.-------------.-----------------.---------------.---------------.---------------.---------.-------------.----------.------.----------.-----------.-----------.----------------.-------------------.-----------------------.------------------.-------------------.-----------------.-----------.---------------.--------------.---------------.--------------.---------------.--------------.-----.---------------.-----------------.-------------------------------.------------.---------.-------------------------.--------.---------------.-------------------.--------------------.--------------------.------------------------------.-----------------.------------------------------------.------.----------. | Individual | Group | Package | PackageVersion | Is Latest | Genetic_Sex | Alternative_IDs | Relation_To | Relation_Degree | Relation_Type | Relation_Note | Collection_ID | Country | Country_ISO | Location | Site | Latitude | Longitude | Date_Type | Date_C14_Labnr | Date_C14_Uncal_BP | Date_C14_Uncal_BP_Err | Date_BC_AD_Start | Date_BC_AD_Median | Date_BC_AD_Stop | Date_Note | MT_Haplogroup | Y_Haplogroup | Source_Tissue | Nr_Libraries | Library_Names | Capture_Type | UDG | Library_Built | Genotype_Ploidy | Data_Preparation_Pipeline_URL | Endogenous | Nr_SNPs | Coverage_on_Target_SNPs | Damage | Contamination | Contamination_Err | Contamination_Meas | Contamination_Note | Genetic_Source_Accession_IDs | Primary_Contact | Publication | Note | Keywords | :============:=======:================:================:===========:=============:=================:=============:=================:===============:===============:===============:=========:=============:==========:======:==========:===========:===========:================:===================:=======================:==================:===================:=================:===========:===============:==============:===============:==============:===============:==============:=====:===============:=================:===============================:============:=========:=========================:========:===============:===================:====================:====================:==============================:=================:====================================:======:==========: -| XXX001 | POP1 | Schiffels_2016 | 1.0.1 | True | M | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016 | | | -| XXX002 | POP2 | Schiffels_2016 | 1.0.1 | True | F | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016 | | | -| XXX003 | POP1 | Schiffels_2016 | 1.0.1 | True | M | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016 | | | -| XXX004 | POP2 | Schiffels_2016 | 1.0.1 | True | F | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016 | | | -| XXX005 | POP2 | Schiffels_2016 | 1.0.1 | True | M | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016;TestPaper1 | | | -| XXX006 | POP2 | Schiffels_2016 | 1.0.1 | True | F | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016;TestPaper1 | | | -| XXX007 | POP1 | Schiffels_2016 | 1.0.1 | True | M | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016;TestBook1 | | | -| XXX008 | POP3 | Schiffels_2016 | 1.0.1 | True | F | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016;TestBook1 | | | -| XXX009 | POP1 | Schiffels_2016 | 1.0.1 | True | F | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016;TestPaper1;TestBook1 | | | -| XXX010 | POP3 | Schiffels_2016 | 1.0.1 | True | M | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Schiffels2016;TestPaper1;TestBook1 | | | +| XXX001 | POP1 | Schiffels_2016 | 1.0.1 | True | M | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016 | n/a | n/a | +| XXX002 | POP2 | Schiffels_2016 | 1.0.1 | True | F | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016 | n/a | n/a | +| XXX003 | POP1 | Schiffels_2016 | 1.0.1 | True | M | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016 | n/a | n/a | +| XXX004 | POP2 | Schiffels_2016 | 1.0.1 | True | F | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016 | n/a | n/a | +| XXX005 | POP2 | Schiffels_2016 | 1.0.1 | True | M | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016;TestPaper1 | n/a | n/a | +| XXX006 | POP2 | Schiffels_2016 | 1.0.1 | True | F | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016;TestPaper1 | n/a | n/a | +| XXX007 | POP1 | Schiffels_2016 | 1.0.1 | True | M | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016;TestBook1 | n/a | n/a | +| XXX008 | POP3 | Schiffels_2016 | 1.0.1 | True | F | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016;TestBook1 | n/a | n/a | +| XXX009 | POP1 | Schiffels_2016 | 1.0.1 | True | F | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016;TestPaper1;TestBook1 | n/a | n/a | +| XXX010 | POP3 | Schiffels_2016 | 1.0.1 | True | M | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | n/a | Schiffels2016;TestPaper1;TestBook1 | n/a | n/a | '------------'-------'----------------'----------------'-----------'-------------'-----------------'-------------'-----------------'---------------'---------------'---------------'---------'-------------'----------'------'----------'-----------'-----------'----------------'-------------------'-----------------------'------------------'-------------------'-----------------'-----------'---------------'--------------'---------------'--------------'---------------'--------------'-----'---------------'-----------------'-------------------------------'------------'---------'-------------------------'--------'---------------'-------------------'--------------------'--------------------'------------------------------'-----------------'------------------------------------'------'----------' diff --git a/test/PoseidonGoldenTests/GoldenTestData/listRemote/listRemote3 b/test/PoseidonGoldenTests/GoldenTestData/listRemote/listRemote3 index 49470cb1..661fd500 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/listRemote/listRemote3 +++ b/test/PoseidonGoldenTests/GoldenTestData/listRemote/listRemote3 @@ -19,7 +19,7 @@ XXX017 POP1 Lamnidis_2018 1.0.1 True Lamnidis2018 XXX018 POP3 Lamnidis_2018 1.0.1 True Lamnidis2018 XXX019 POP1 Lamnidis_2018 1.0.1 True Lamnidis2018 XXX020 POP3 Lamnidis_2018 1.0.1 True Lamnidis2018 -SAMPLE0 1 Wang_2020 0.1.0 True +SAMPLE0 1 Wang_2020 0.1.0 True n/a SAMPLE1 2 Wang_2020 0.1.0 True TestPaper1 SAMPLE2 3 Wang_2020 0.1.0 True Wang2020;TestPaper1 SAMPLE3 4 Wang_2020 0.1.0 True Wang2020;TestBook2 diff --git a/test/PoseidonGoldenTests/GoldenTestData/listRemote/listRemote5 b/test/PoseidonGoldenTests/GoldenTestData/listRemote/listRemote5 index 66f95a8e..4a53c1a2 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/listRemote/listRemote5 +++ b/test/PoseidonGoldenTests/GoldenTestData/listRemote/listRemote5 @@ -1,26 +1,26 @@ Individual Group Package PackageVersion Is Latest Genetic_Sex Alternative_IDs Relation_To Relation_Degree Relation_Type Relation_Note Collection_ID Country Country_ISO Location Site Latitude Longitude Date_Type Date_C14_Labnr Date_C14_Uncal_BP Date_C14_Uncal_BP_Err Date_BC_AD_Start Date_BC_AD_Median Date_BC_AD_Stop Date_Note MT_Haplogroup Y_Haplogroup Source_Tissue Nr_Libraries Library_Names Capture_Type UDG Library_Built Genotype_Ploidy Data_Preparation_Pipeline_URL Endogenous Nr_SNPs Coverage_on_Target_SNPs Damage Contamination Contamination_Err Contamination_Meas Contamination_Note Genetic_Source_Accession_IDs Primary_Contact Publication Note Keywords -XXX011 POP1 Lamnidis_2018 1.0.0 False M Lamnidis2018 -XXX012 POP2 Lamnidis_2018 1.0.0 False F Lamnidis2018 -XXX013 POP1 Lamnidis_2018 1.0.0 False M Lamnidis2018 -XXX014 POP2 Lamnidis_2018 1.0.0 False F Lamnidis2018 -XXX015 POP2 Lamnidis_2018 1.0.0 False M Lamnidis2018 -XXX016 POP2 Lamnidis_2018 1.0.0 False F Lamnidis2018 -XXX017 POP1 Lamnidis_2018 1.0.0 False M Lamnidis2018 -XXX018 POP3 Lamnidis_2018 1.0.0 False F Lamnidis2018 -XXX019 POP1 Lamnidis_2018 1.0.0 False F Lamnidis2018 -XXX099 POP3 Lamnidis_2018 1.0.0 False M Lamnidis2018 -XXX011 POP1 Lamnidis_2018 1.0.1 True M Lamnidis2018 -XXX012 POP2 Lamnidis_2018 1.0.1 True F Lamnidis2018 -XXX013 POP1 Lamnidis_2018 1.0.1 True M Lamnidis2018 -XXX014 POP2 Lamnidis_2018 1.0.1 True F Lamnidis2018 -XXX015 POP2 Lamnidis_2018 1.0.1 True M Lamnidis2018 -XXX016 POP2 Lamnidis_2018 1.0.1 True F Lamnidis2018 -XXX017 POP1 Lamnidis_2018 1.0.1 True M Lamnidis2018 -XXX018 POP3 Lamnidis_2018 1.0.1 True F Lamnidis2018 -XXX019 POP1 Lamnidis_2018 1.0.1 True F Lamnidis2018 -XXX020 POP3 Lamnidis_2018 1.0.1 True M Lamnidis2018 -SAMPLE0 1 Wang_2020 0.1.0 True F -SAMPLE1 2 Wang_2020 0.1.0 True M TestPaper1 -SAMPLE2 3 Wang_2020 0.1.0 True F Wang2020;TestPaper1 -SAMPLE3 4 Wang_2020 0.1.0 True M Wang2020;TestBook2 -SAMPLE4 5 Wang_2020 0.1.0 True F Wang2020;TestPaper1;TestBook2 +XXX011 POP1 Lamnidis_2018 1.0.0 False M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX012 POP2 Lamnidis_2018 1.0.0 False F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX013 POP1 Lamnidis_2018 1.0.0 False M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX014 POP2 Lamnidis_2018 1.0.0 False F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX015 POP2 Lamnidis_2018 1.0.0 False M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX016 POP2 Lamnidis_2018 1.0.0 False F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX017 POP1 Lamnidis_2018 1.0.0 False M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX018 POP3 Lamnidis_2018 1.0.0 False F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX019 POP1 Lamnidis_2018 1.0.0 False F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX099 POP3 Lamnidis_2018 1.0.0 False M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX011 POP1 Lamnidis_2018 1.0.1 True M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX012 POP2 Lamnidis_2018 1.0.1 True F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX013 POP1 Lamnidis_2018 1.0.1 True M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX014 POP2 Lamnidis_2018 1.0.1 True F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX015 POP2 Lamnidis_2018 1.0.1 True M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX016 POP2 Lamnidis_2018 1.0.1 True F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX017 POP1 Lamnidis_2018 1.0.1 True M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX018 POP3 Lamnidis_2018 1.0.1 True F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX019 POP1 Lamnidis_2018 1.0.1 True F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +XXX020 POP3 Lamnidis_2018 1.0.1 True M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Lamnidis2018 n/a n/a +SAMPLE0 1 Wang_2020 0.1.0 True F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a +SAMPLE1 2 Wang_2020 0.1.0 True M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a TestPaper1 n/a n/a +SAMPLE2 3 Wang_2020 0.1.0 True F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Wang2020;TestPaper1 n/a n/a +SAMPLE3 4 Wang_2020 0.1.0 True M n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Wang2020;TestBook2 n/a n/a +SAMPLE4 5 Wang_2020 0.1.0 True F n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a Wang2020;TestPaper1;TestBook2 n/a n/a