diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..dfd0e30 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +# Set update schedule for GitHub Actions + +version: 2 +updates: + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions every week + interval: "weekly" diff --git a/CHANGELOG.md b/CHANGELOG.md index 4799bf4..5519d75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,49 +1,66 @@ -- V 0.2.1.2: Maintenance: Switched to a newer compiler/resolver version, lifted some dependency restrictions, ran stylish-haskell on the entire codebase, updated the github actions, deprecated the haddock documentation for the dev version on GitHub -- V 0.2.1.1: Lifted some restrictions regarding the upper version bounds of dependencies -- V 0.2.1.0: Added a mechanism to detect terminal encoding and fall back on a simpler CLI plot if it is not UTF-8 -- V 0.2.0.1: Brought sample names back to default CLI output -- V 0.2.0.0: Added sum (and product) calibration and made the necessary changes to various interfaces (including CLI) to make this functionality accessible -- V 0.1.2.0: Added simple summary data (CalRangeSummary with calibrated median age + begin and end of 1- and 2-sigma ranges) to CalC14 and the cli output and plot. The latter got refactored and enhanced in the process. HDRs are now "ordered", so _hdrstart actually stores the older and _hdrstop the younger date -- V 0.1.1.0: Complete rewrite of the cli output handling to avoid a memory leak -- V 0.1.0.0: Switch to PVP versioning (https://pvp.haskell.org/) -- V 0.24.4: Removed big dependencies bytestring and statistics -- V 0.24.3: Multiple changes in .cabal to make cabal check happy -- V 0.24.2: Found and fixed another severe bug in renderCalCurve -- V 0.24.1: Fixed a serious bug in renderCalCurveMatrix -- V 0.24.0: Introduced more precise data types to distinguish years BP and years BC/AD -- V 0.23.1: Small changes to the instances of some general types -- V 0.23.0: Renamed multiple functions to make the naming of operations for parsing, reading, from-file reading, rendering and writing consistent across data types -- V 0.22.0: Changed the interface of the important calibrateDates function with a new config data type CalibrateDatesConf -- V 0.21.3: Refactored the calibration curve interpolation -- V 0.21.2: Introduced doctest and added some tiny examples/tests to try it out -- V 0.21.1: Split up the calibration module for better readability -- V 0.21.0: Added a neat CLI density plot for calibrated dates -- V 0.20.2: Some performance improvements for the calibration of large numbers of dates -- V 0.20.1: Better (parsing) error handling -- V 0.20.0: Added an option --allowOutside to allow for calibrations to run outside the range of the calibration curve -- V 0.19.0: Added functionality to filter out dates outside of the range of the calibration curve and report an error in this case -- V 0.18.0: Implemented calibration with a StudentT distribution to mimic Bchron and established that as the new default. Reimplemented the --method option of the CLI tool to reflect that change -- V 0.17.0: Changed argument order in CalCurve data type to adjust to the order in .14C files -- V 0.16.0: Refactoring in the library to simplify and clarify the interface -- V 0.15.0: Added another calibration algorithm (following the implementation by Andrew Parnell in Bchron) and a method switch for the CLI -- V 0.14.0: Introduced strictness, which brought a significant increase in performance. See the discussion here: https://old.reddit.com/r/haskell/comments/picjy6/how_could_i_improve_the_performance_of_my/ -- V 0.13.0: Major rewrite with the vector library - includes multiple bugfixes, but is surprisingly slow -- V 0.12.0: Renamed some core functions -- V 0.11.0: Made calibration curve interpolation optional and turned it off by default -- V 0.10.0: Simplified CLI interface by dropping the "calibrate" subcommand (currycarbon is sufficient now) and by repurposing -q from --quickOut to --quiet -- V 0.9.0: Made --hdrFile output a lot more machine-readable -- V 0.8.0: Added option --calibrationCurveFile to calibrate with different calibration curves -- V 0.7.2: More documentation, small changes in code layout and renamed CLI module that provides runCalibrate -- V 0.7.1: Added type documentation with haddock and replaced the existing types with record types -- V 0.7.0: Changed the date input interface once more -- V 0.6.0: Changed the date input interface, because parenthesis can be part of valid lab numbers -- V 0.5.2: Fixed parallel evalutation (deepseq forced memory-intensive, non-lazy behaviour) -- V 0.5.1: Added github release action (copied from poseidon-hs) -- V 0.5.0: Added file input for dates to calibrate -- V 0.4.0: Made output calibrated dates negative numbers for BC and positive for AD - and adjusted HDR printing accordingly -- V 0.3.2: Some optimisation -- V 0.3.1: Added automatic filling of unknown sample names -- V 0.3.0: Simplified interface -- V 0.2.1: Removed ascii plot functionality -- V 0.2.0: Added parallel processing for the main calibration operation -- V 0.1.0: First basically working version +- V 0.3.0.0: Major update with multiple breaking changes and new features: + - Added a new mechanism to draw random age samples from a CalPDF (`sampleAgesFromCalPDF :: AgeSamplingConf -> CalPDF -> RandomAgeSample`). This is available from the command line with the options `samplesFile`, `--seed`, and `-n`/`--nrSamples`. + - Added a new concept to the `CalExpr` data type: Age ranges with uniform probability for each year in the range (`TimeWindowBP` and `TimeWindowBCAD`). + - Reworked the encoding and evaluation mechanism for calibration expressions: + - Introduced the `NamedCalExpr` as a wrapper around `CalExpr` with an identifier, and then adjusted the ID generation for `CalPDF`s to prioritize this identifier. + - Reworked the CLI DSL to support a standardized configuration language syntax implemented in a new module `ParserHelpers.hs`. This introduces a set of flexible functions (`calExpr()`, `uncalC14()`, `rangeBP()`, `rangeBCAD()`, `sum()` and `product()`) which generally complement the previously available syntax and operators. They are preserved as syntactic sugar for the new, more standardized syntax. Unfortunately this change breaks some expressions that were valid before (e.g. `"3000,30 + 3020,50"`). They now require additional parentheses to pass (eo e.g. `"(3000,30) + (3020,50)"`). + - Added some unit tests to cover the increasingly complex DSL. + - Changed the output files from .csv to .tsv and to a more meaningful and consistent set of column names. + - Slightly adjusted the rendering of the pretty, human-focussed command line output. + - Updated and improved the command line documentation. + - Renamed some CLI arguments: + - `--calibrationCurveFile` -> `--calCurveFile` + - `--calCurveSegmentFile` -> `--calCurveSegFile` + - `--calCurveMatrixFile` -> `--calCurveMatFile` + - Changed the CLI behaviour with `--calCurveSegFile` and `--calCurveMatFile`: currycarbon now fails with these options if the first sample is not a single, uncalibrated radiocarbon date (so `uncalC14()`). + - Added a simple golden test system with some basic calls to the `currycarbon` CLI tool. + - Switched to a new GHC version (v9.4.7) and stackage resolver version (lts-21.17). +- V 0.2.1.2: Maintenance: Switched to a newer compiler/resolver version, lifted some dependency restrictions, ran stylish-haskell on the entire codebase, updated the github actions, deprecated the haddock documentation for the dev version on GitHub. +- V 0.2.1.1: Lifted some restrictions regarding the upper version bounds of dependencies. +- V 0.2.1.0: Added a mechanism to detect terminal encoding and fall back on a simpler CLI plot if it is not UTF-8. +- V 0.2.0.1: Brought sample names back to default CLI output. +- V 0.2.0.0: Added sum (and product) calibration and made the necessary changes to various interfaces (including CLI) to make this functionality accessible. +- V 0.1.2.0: Added simple summary data (`CalRangeSummary` with calibrated median age + begin and end of 1- and 2-sigma ranges) to `CalC14` and the CLI output and plot. The latter got refactored and enhanced in the process. `HDR`s are now "ordered", so `_hdrstart` actually stores the older and `_hdrstop` the younger date. +- V 0.1.1.0: Complete rewrite of the CLI output handling to avoid a memory leak. +- V 0.1.0.0: Switch to PVP versioning (https://pvp.haskell.org/). +- V 0.24.4: Removed big dependencies bytestring and statistics. +- V 0.24.3: Multiple changes in .cabal to make cabal check happy. +- V 0.24.2: Found and fixed another severe bug in `renderCalCurve`. +- V 0.24.1: Fixed a serious bug in `renderCalCurveMatrix`. +- V 0.24.0: Introduced more precise data types to distinguish years BP and years BC/AD. +- V 0.23.1: Small changes to the instances of some general types. +- V 0.23.0: Renamed multiple functions to make the naming of operations for parsing, reading, from-file reading, rendering and writing consistent across data types. +- V 0.22.0: Changed the interface of the important `calibrateDates` function with a new config data type `CalibrateDatesConf`. +- V 0.21.3: Refactored the calibration curve interpolation. +- V 0.21.2: Introduced doctest and added some tiny examples/tests to try it out. +- V 0.21.1: Split up the calibration module for better readability. +- V 0.21.0: Added a neat CLI density plot for calibrated dates. +- V 0.20.2: Some performance improvements for the calibration of large numbers of dates. +- V 0.20.1: Better (parsing) error handling. +- V 0.20.0: Added an option `--allowOutside` to allow for calibrations to run outside the range of the calibration curve. +- V 0.19.0: Added functionality to filter out dates outside of the range of the calibration curve and report an error in this case. +- V 0.18.0: Implemented calibration with a StudentT distribution to mimic Bchron and established that as the new default. Reimplemented the `--method` option of the CLI tool to reflect that change. +- V 0.17.0: Changed argument order in the `CalCurve` data type to adjust to the order in `.14C` files. +- V 0.16.0: Refactoring in the library to simplify and clarify the interface. +- V 0.15.0: Added another calibration algorithm (following the implementation by Andrew Parnell in Bchron) and a method switch for the CLI. +- V 0.14.0: Introduced strictness, which brought a significant increase in performance. See the discussion here: https://old.reddit.com/r/haskell/comments/picjy6/how_could_i_improve_the_performance_of_my/. +- V 0.13.0: Major rewrite with the vector library - includes multiple bugfixes, but is surprisingly slow. +- V 0.12.0: Renamed some core functions. +- V 0.11.0: Made calibration curve interpolation optional and turned it off by default. +- V 0.10.0: Simplified CLI interface by dropping the `calibrate` subcommand (`currycarbon` is sufficient now) and by repurposing `-q` from `--quickOut` to `--quiet`. +- V 0.9.0: Made `--hdrFile` output a lot more machine-readable. +- V 0.8.0: Added option `--calibrationCurveFile` to calibrate with different calibration curves. +- V 0.7.2: More documentation, small changes in code layout and renamed CLI module that provides `runCalibrate`. +- V 0.7.1: Added type documentation with haddock and replaced the existing types with record types. +- V 0.7.0: Changed the date input interface once more. +- V 0.6.0: Changed the date input interface, because parenthesis can be part of valid lab numbers. +- V 0.5.2: Fixed parallel evaluation (deepseq forced memory-intensive, non-lazy behaviour). +- V 0.5.1: Added github release action (copied from poseidon-hs). +- V 0.5.0: Added file input for dates to calibrate. +- V 0.4.0: Made output calibrated dates negative numbers for BC and positive for AD - and adjusted HDR printing accordingly. +- V 0.3.2: Some optimisation. +- V 0.3.1: Added automatic filling of unknown sample names. +- V 0.3.0: Simplified interface. +- V 0.2.1: Removed ascii plot functionality. +- V 0.2.0: Added parallel processing for the main calibration operation. +- V 0.1.0: First basically working version. diff --git a/README.md b/README.md index 1185bf9..788dc94 100644 --- a/README.md +++ b/README.md @@ -28,86 +28,126 @@ chmod +x currycarbon-Linux ``` ``` -currycarbon v0.2.1.0 (UTF-8) +currycarbon v0.3.0.0 (UTF-8) Method: Bchron {distribution = StudentTDist {ndf = 100.0}} Curve: IntCal20 Calibrating... -DATE: Sample1:4990±30BP +CalEXPR: [1] Sample1:4990±30BP Calibrated: 3936BC >> 3794BC > 3757BC < 3662BC << 3654BC 1-sigma: 3794-3707BC, 3666-3662BC 2-sigma: 3936-3874BC, 3804-3697BC, 3684-3654BC - ▁▁▁ ▁▁▁▁ - ▁▁▒▒▒▁▁▁▁▒▒▒▒▁ - ▒▒▒▒▒▒▒▒▒▒▒▒▒▒ ▁ - ▁▁ ▁▒▒▒▒▒▒▒▒▒▒▒▒▒▒▁ ▁▒ - ▁▁▒▒▁ ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ ▁▒▒▁ - ▁▁▁▁▁▒▒▒▒▒▁ ▁▁▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▁▁▁▁▒▒▒▒▁ + ▁▁▁ ▁▁▁▁ + ▁▁▒▒▒▁▁▁▁▒▒▒▒▁ + ▒▒▒▒▒▒▒▒▒▒▒▒▒▒ ▁ + ▁▁ ▁▒▒▒▒▒▒▒▒▒▒▒▒▒▒▁ ▁▒ + ▁▁▒▒▁ ▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ ▁▒▒▁ + ▁▁▁▁▁▒▒▒▒▒▁ ▁▁▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▁▁▁▁▒▒▒▒▁ ▁▁▁▒▒▒▒▒▒▒▒▒▒▒▁▁▁▁▁▁▁▁▁▁▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▁ -3950 ┄─────────┬───────────────┬────────────────┬─────────┄ -3640 - > > ^ < < - ──────────────── ─ - ─────────── ────────────────── ────── + > > ^ < < + ──────────────── ─ + ─────────── ────────────────── ────── Done. ``` ``` -Usage: currycarbon [--version] [DATE] [-i|--inputFile ARG] - [--calibrationCurveFile ARG] [--method ARG] [--allowOutside] - [--noInterpolation] [-q|--quiet] [--densityFile ARG] - [--hdrFile ARG] [--calCurveSegmentFile ARG] - [--calCurveMatrixFile ARG] +Usage: currycarbon [--version] [CalEXPRs] [-i|--inputFile FILE] + [--calCurveFile FILE] [--method DSL] [--allowOutside] + [--noInterpolation] [-q|--quiet] [--densityFile FILE] + [--hdrFile FILE] + [[--seed INT] (-n|--nrSamples INT) --samplesFile FILE] + [--calCurveSegFile FILE] [--calCurveMatFile FILE] + Intercept calibration of radiocarbon dates Available options: -h,--help Show this help text --version Show version - DATE A string with one or multiple uncalibrated dates of - the form ",," where is optional - (e.g. "S1,4000,50"). Multiple dates can be listed - separated by ";" (e.g. "S1,4000,50; 3000,25; - S3,1000,20"). To sum or multiply the post calibration - probability distributions, dates can be combined with - "+" or "*" (e.g. "4000,50 + 4100,100"). These - expressions can be combined arbitrarily. Parentheses - can be added to specify the order of operations (e.g. - "(4000,50 + 4100,100) * 3800,50") - -i,--inputFile ARG A file with a list of calibration expressions. - Formated just as DATE, but with a new line for each - input date. DATE and --inputFile can be combined and - you can provide multiple instances of --inputFile - --calibrationCurveFile ARG - Path to an calibration curve file in .14c format. The - calibration curve will be read and used for + CalEXPRs --- + A string to specify "calibration expressions", so + small chronological models for individual events. + These can include uncalibrated radiocarbon ages, + uniform age ranges and operations to combine the + resulting age probability distribution as sums or + products. + The expression language includes the following + functions: + + - calExpr(name = STRING, expr = EXPR) + - uncalC14(name = STRING, age = INT, sigma = INT) + - rangeBP(name = STRING, start = INT, stop = INT) + - rangeBCAD(name = STRING, start = INT, stop = INT) + - sum(a = EXPR, b = EXPR) + - product(a = EXPR, b = EXPR) + + The order of arguments is fixed, but the argument + names ' =' can be left out. The 'name' arguments + are optional. Some functions can be shortened with + syntactic sugar: + + - calExpr(STRING, EXPR) -> name: EXPR + - uncalC14(STRING, INT, INT) -> STRING,INT,INT + - sum(EXPR, EXPR) -> EXPR + EXPR + - product(EXPR, EXPR) -> EXPR * EXPR + + Parentheses '()' can be used to specify the + evaluation order within an expression. Multiple + expressions can be chained, separated by ';'. + + Examples: + 1. Calibrate a single radiocarbon date with a mean + age BP and a one sigma standard deviation: + "3000,30" or "uncalC14(age = 3000, sigma = 30)" + 2. Calibrate two radiocarbon dates and sum them: + "(3000,30) + (3100,40)" or + "sum(uncalC14(3000,30), uncalC14(3100,40))" + 3. Compile a complex, named expression: + "Ex3: ((3000,30) + (3100,40)) * rangeBP(3200,3000)" + --- + -i,--inputFile FILE A file with a list of calibration expressions. + Formatted just as CalEXPRs, but with a new line for + each input expression. CalEXPRs and --inputFile can + be combined and you can provide multiple instances of + --inputFile. Note that syntactic sugar allows to read + simple radiocarbon dates from a headless .csv file + with one sample per row: ,,. + --calCurveFile FILE Path to an calibration curve file in '.14c' format. + The calibration curve will be read and used for calibration. If no file is provided, currycarbon will - use the intcal20 curve. - --method ARG The calibration algorithm that should be used: - ",,". + use the 'intcal20' curve. + --method DSL The calibration algorithm that should be used: + ',,'. The default setting is equivalent to "Bchron,StudentT,100" which copies the algorithm - implemented in the Bchron R package. Alternatively we - implemented "MatrixMult", which comes without further - arguments. For the Bchron algorithm with a normal - distribution ("Bchron,Normal") the degrees of freedom - argument is not relevant + implemented in the Bchron R package. For the Bchron + algorithm with a normal distribution + ("Bchron,Normal") the degrees of freedom argument is + not relevant + Alternatively we implemented "MatrixMult", which + comes without further arguments. --allowOutside Allow calibrations to run outside the range of the - calibration curve - --noInterpolation Don't interpolate the calibration curve + calibration curve. + --noInterpolation Do not interpolate the calibration curve. -q,--quiet Suppress the printing of calibration results to the - command line - --densityFile ARG Path to an output file which stores output densities - per sample and calender year - --hdrFile ARG Path to an output file which stores the high - probability density regions for each sample - --calCurveSegmentFile ARG - Path to an output file which stores the relevant, + command line. + --densityFile FILE Path to an output file to store output densities per + CalEXPR and calender year. + --hdrFile FILE Path to an output file to store the high probability + density regions for each CalEXPR. + --seed INT Seed for the random number generator for age + sampling. The default causes currycarbon to fall back + to a random seed. (default: Nothing) + -n,--nrSamples INT Number of age samples to draw per CalEXPR. + --samplesFile FILE Path to an output file to store age samples for each + CalEXPR. + --calCurveSegFile FILE Path to an output file to store the relevant, interpolated calibration curve segment for the first - (!) input date in a long format. This option as well - as --calCurveMatrixFile are mostly meant for - debugging - --calCurveMatrixFile ARG Path to an output file which stores the relevant, + (!) input date. This option as well as + --calCurveMatFile are meant for debugging. + --calCurveMatFile FILE Path to an output file which stores the relevant, interpolated calibration curve segment for the first - (!) input date in a wide matrix format + (!) input date in a wide matrix format. ``` ### For developers who want to edit the code diff --git a/cabal.project b/cabal.project index 551b1c7..ebcdadc 100644 --- a/cabal.project +++ b/cabal.project @@ -1,2 +1,2 @@ packages: ./*.cabal -with-compiler: ghc-9.2.7 \ No newline at end of file +with-compiler: ghc-9.4.7 \ No newline at end of file diff --git a/currycarbon.cabal b/currycarbon.cabal index e4f829e..26fde57 100644 --- a/currycarbon.cabal +++ b/currycarbon.cabal @@ -1,5 +1,5 @@ name: currycarbon -version: 0.2.1.2 +version: 0.3.0.0 synopsis: A package for simple, fast radiocarbon calibration description: Radiocarbon calibration with the intercept method optimised for fast calibration of many dates. homepage: https://github.com/nevrome/currycarbon @@ -26,6 +26,7 @@ library Currycarbon.Calibration.Bchron Currycarbon.Calibration.Calibration Currycarbon.CLI.RunCalibrate + Currycarbon.ParserHelpers Currycarbon.Parsers Currycarbon.SumCalibration Currycarbon.Types @@ -38,6 +39,8 @@ library , parsec >= 3.1 && < 3.2 , vector >= 0.12 && < 0.14 , math-functions >= 0.3 && < 0.4 + , MonadRandom >= 0.6 && < 1 + , random > 1.2 && < 1.3 default-language: Haskell2010 @@ -57,3 +60,21 @@ executable currycarbon Haskell2010 ghc-options: -threaded -with-rtsopts=-N -optP-Wno-nonportable-include-path + +test-suite spec + hs-source-dirs: + test + main-is: + Spec.hs + type: + exitcode-stdio-1.0 + build-depends: + base >= 4.9 && < 5 + , currycarbon + , hspec + , process + other-modules: + ParserSpec, + GoldenSpec + default-language: + Haskell2010 \ No newline at end of file diff --git a/playground/test.R b/playground/test.R index 9ac376f..7c31428 100644 --- a/playground/test.R +++ b/playground/test.R @@ -15,61 +15,77 @@ run_currycarbon <- function(additional_commands = "") { run_currycarbon_calPDF <- function(additional_commands = "") { run_currycarbon(paste( - "--densityFile /tmp/currycarbonOutput.txt", + "--densityFile /tmp/currycarbonOutput.tsv -q", additional_commands )) - readr::read_csv( - "/tmp/currycarbonOutput.txt", + readr::read_tsv( + "/tmp/currycarbonOutput.tsv", col_types = readr::cols() ) } #### comparison with the Bchron R package #### -curry_bchron_studentT100 <- run_currycarbon_calPDF() -curry_matrixmult_default <- run_currycarbon_calPDF("--method MatrixMultiplication") -curry_bchron_normal <- run_currycarbon_calPDF("--method \"Bchron,Normal\"") -# --noInterpolation")) - +curry_bchron_studentT100 <- run_currycarbon_calPDF() |> + dplyr::rename(density_curry_bchron_studentT100 = density) +curry_matrixmult <- run_currycarbon_calPDF("--method MatrixMultiplication") |> + dplyr::rename(density_curry_matrixmult = density) +curry_bchron_normal <- run_currycarbon_calPDF("--method \"Bchron,Normal\"") |> + dplyr::rename(density_curry_bchron_normal = density) bchronRaw <- Bchron::BchronCalibrate( testdate[1], testdate[2], calCurves = 'intcal20' ) bchron <- tibble::tibble( - calBCAD = -bchronRaw$Date1$ageGrid + 1950, + yearBCAD = -bchronRaw$Date1$ageGrid + 1950, density_bchron = bchronRaw$Date1$densities ) -bchron |> - dplyr::full_join(curry_bchron_studentT100, by = "calBCAD") |> - dplyr::full_join(curry_bchron_normal, by = "calBCAD") |> - dplyr::full_join(curry_matrixmult_default, by = "calBCAD") |> +bchron |> + dplyr::full_join(curry_bchron_studentT100, by = "yearBCAD") |> + dplyr::full_join(curry_bchron_normal, by = "yearBCAD") |> + dplyr::full_join(curry_matrixmult, by = "yearBCAD") |> tidyr::pivot_longer( tidyselect::starts_with("dens"), names_to = "method" ) |> ggplot() + - geom_point( - aes(x = calBCAD, y = value, colour = method), - size = 1, alpha = 0.5 + geom_line( + aes(x = yearBCAD, y = value, colour = method), + linewidth = 1, alpha = 0.5 ) +#### confirm the reliability of the random age sampling #### + +run_currycarbon("--samplesFile /tmp/currySamples.tsv -n 10000") + +age_samples <- readr::read_tsv("/tmp/currySamples.tsv") + +year_count <- age_samples |> + dplyr::mutate(yearBCAD = round(yearBCAD, -1)) |> + dplyr::group_by(yearBCAD) |> + dplyr::summarise(n = dplyr::n()) + +year_count |> + ggplot() + + geom_bar(aes(x = yearBCAD, y = n), stat = "identity") + #### large test (for memory leaks) #### calpal <- c14bazAAR::get_calpal() calpal |> dplyr::select(c14age, c14std) |> dplyr::slice_head(n = 5000) |> readr::write_csv("/tmp/currycarbon_large_input_test.csv", col_names = F) -#currycarbon --inputFile /tmp/currycarbon_large_input_test.csv -q --densityFile /dev/null +system("currycarbon --inputFile /tmp/currycarbon_large_input_test.csv -q") #### cal curve #### run_currycarbon(paste( - "--calCurveMatrixFile /tmp/curryMatrix.csv", - "--calCurveSegmentFile /tmp/currySegment.csv" + "--calCurveMatFile /tmp/curryMatrix.tsv", + "--calCurveSegFile /tmp/currySegment.tsv" )) -cal_matrix <- readr::read_csv("/tmp/curryMatrix.csv") |> +cal_matrix <- readr::read_tsv("/tmp/curryMatrix.tsv") |> tidyr::gather(vars, count, -...1) |> dplyr::transmute( uncal = ...1, @@ -77,7 +93,7 @@ cal_matrix <- readr::read_csv("/tmp/curryMatrix.csv") |> val = count ) -cal_segment <- readr::read_csv("/tmp/currySegment.csv") +cal_segment <- readr::read_tsv("/tmp/currySegment.tsv") ggplot() + geom_raster( @@ -86,24 +102,24 @@ ggplot() + ) + geom_path( data = cal_segment, - mapping = aes(x = calBCAD, y = uncalBCAD), - color = "red", size = 0.5 + mapping = aes(x = calYearBCAD, y = uncalYearBCAD), + color = "red", linewidth = 0.5 ) + scale_y_reverse() + coord_fixed() #### sum and product cal #### -system('currycarbon "A,3000,20+B,2500,200+C,2800,70" --densityFile /tmp/currycarbonSumCalTest1.csv') +system('currycarbon "A,3000,20+B,2500,200+C,2800,70" --densityFile /tmp/currycarbonSumCalTest1.tsv') -sumCalTest1 <- readr::read_csv( - "/tmp/currycarbonSumCalTest1.csv", +sumCalTest1 <- readr::read_tsv( + "/tmp/currycarbonSumCalTest1.tsv", col_types = readr::cols() ) sumCalTest1 |> ggplot() + - geom_line(aes(x = calBCAD, y = density)) + geom_line(aes(x = yearBCAD, y = density)) # Oxcal: # @@ -114,16 +130,16 @@ sumCalTest1 |> # R_Date("C",2800,70); # }; -system('currycarbon "A,3000,30+B,3200,40*C,3300,30" --densityFile /tmp/currycarbonSumCalTest2.csv') +system('currycarbon "A,3000,30+B,3200,40*C,3300,30" --densityFile /tmp/currycarbonSumCalTest2.tsv') -sumCalTest2 <- readr::read_csv( - "/tmp/currycarbonSumCalTest2.csv", +sumCalTest2 <- readr::read_tsv( + "/tmp/currycarbonSumCalTest2.tsv", col_types = readr::cols() ) sumCalTest2 |> ggplot() + - geom_line(aes(x = calBCAD, y = density)) + geom_line(aes(x = yearBCAD, y = density)) # Oxcal: # @@ -136,3 +152,18 @@ sumCalTest2 |> # R_Date("C",3300,30); # }; # }; + +#### another test of the random age sampling #### + +system('currycarbon "A,3000,30+B,3200,40*C,3300,30" --samplesFile /tmp/currySamples.tsv -n 10000') + +age_samples <- readr::read_tsv("/tmp/currySamples.tsv") + +year_count <- age_samples |> + dplyr::mutate(yearBCAD = round(yearBCAD, -1)) |> + dplyr::group_by(yearBCAD) |> + dplyr::summarise(n = dplyr::n()) + +year_count |> + ggplot() + + geom_bar(aes(x = yearBCAD, y = n), stat = "identity") diff --git a/src-executables/Main-currycarbon.hs b/src-executables/Main-currycarbon.hs index 48b1a3b..49111d5 100644 --- a/src-executables/Main-currycarbon.hs +++ b/src-executables/Main-currycarbon.hs @@ -10,6 +10,7 @@ import Paths_currycarbon (version) import Control.Exception (catch) import Data.Version (showVersion) import qualified Options.Applicative as OP +import qualified Options.Applicative.Help as OH import System.Exit (exitFailure) import System.IO (hGetEncoding, hPutStrLn, stderr, stdout) @@ -56,8 +57,8 @@ optParser :: OP.Parser Options optParser = CmdCalibrate <$> calibrateOptParser calibrateOptParser :: OP.Parser CalibrateOptions -calibrateOptParser = CalibrateOptions <$> optParseCalExprString - <*> optParseCalExprFromFile +calibrateOptParser = CalibrateOptions <$> optParseNamedCalExprString + <*> optParseNamedCalExprFromFile <*> optParseCalCurveFromFile <*> optParseCalibrationMethod <*> optParseAllowOutside @@ -66,6 +67,7 @@ calibrateOptParser = CalibrateOptions <$> optParseCalExprString <*> pure "unknown" <*> optParseDensityFile <*> optParseHDRFile + <*> optParseAgeSamplingSettings <*> optParseCalCurveSegmentFile <*> optParseCalCurveMatrixFile @@ -75,97 +77,185 @@ calibrateOptParser = CalibrateOptions <$> optParseCalExprString -- -- These functions define and handle the CLI input arguments -optParseCalExprString :: OP.Parser [CalExpr] -optParseCalExprString = concat <$> OP.many (OP.argument (OP.eitherReader readCalExpr) ( - OP.metavar "DATE" <> - OP.help "A string with one or multiple uncalibrated dates of \ - \the form \",,\" \ - \where is optional (e.g. \"S1,4000,50\"). \ - \Multiple dates can be listed separated by \";\" (e.g. \"S1,4000,50; 3000,25; S3,1000,20\"). \ - \To sum or multiply the post calibration probability distributions, dates can be combined with \ - \\"+\" or \"*\" (e.g. \"4000,50 + 4100,100\"). \ - \These expressions can be combined arbitrarily. Parentheses can be added to specify the order \ - \of operations (e.g. \"(4000,50 + 4100,100) * 3800,50\")" +optParseNamedCalExprString :: OP.Parser [NamedCalExpr] +optParseNamedCalExprString = concat <$> OP.many (OP.argument (OP.eitherReader readNamedCalExprs) ( + OP.metavar "CalEXPRs" <> + OP.helpDoc ( Just ( + "---" + <> OH.hardline + <> s2d "A string to specify \"calibration expressions\", so small chronological \ + \models for individual events. These can include uncalibrated radiocarbon ages, \ + \uniform age ranges and operations to combine the resulting age probability \ + \distribution as sums or products." + <> OH.hardline <> + s2d "The expression language includes the following functions:" + <> OH.hardline + <> OH.hardline <> "- calExpr(id = STRING, expr = EXPR)" + <> OH.hardline <> "- uncalC14(id = STRING, yearBP = INT, sigma = INT)" + <> OH.hardline <> "- rangeBP(id = STRING, start = INT, stop = INT)" + <> OH.hardline <> "- rangeBCAD(id = STRING, start = INT, stop = INT)" + <> OH.hardline <> "- sum(a = EXPR, b = EXPR)" + <> OH.hardline <> "- product(a = EXPR, b = EXPR)" + <> OH.hardline + <> OH.hardline + <> s2d "The order of arguments is fixed, but the argument names ' =' \ + \can be left out. The 'id' arguments are optional. \ + \Some functions can be shortened with syntactic sugar:" + <> OH.hardline + <> OH.hardline <> "- calExpr(STRING, EXPR) -> id: EXPR" + <> OH.hardline <> "- uncalC14(STRING, INT, INT) -> STRING,INT,INT" + <> OH.hardline <> "- sum(EXPR, EXPR) -> EXPR + EXPR" + <> OH.hardline <> "- product(EXPR, EXPR) -> EXPR * EXPR" + <> OH.hardline + <> OH.hardline + <> s2d "Parentheses '()' can be used to specify the evaluation order within \ + \an expression. Multiple expressions can be chained, separated by ';'." + <> OH.hardline + <> OH.hardline + <> "Examples:" + <> OH.hardline <> s2d "1. Calibrate a single radiocarbon date with a mean age BP \ + \and a one sigma standard deviation:" + <> OH.hardline <> "\"3000,30\" or \"uncalC14(yearBP = 3000, sigma = 30)\"" + <> OH.hardline <> s2d "2. Calibrate two radiocarbon dates and sum them:" + <> OH.hardline <> "\"(3000,30) + (3100,40)\" or" + <> OH.hardline <> "\"sum(uncalC14(3000,30), uncalC14(3100,40))\"" + <> OH.hardline <> s2d "3. Compile a complex, named expression:" + <> OH.hardline <> "\"Ex3: ((3000,30) + (3100,40)) * rangeBP(3200,3000)\"" + <> OH.hardline + <> "---" )) + )) + +s2d :: String -> OH.Doc +s2d str = OH.fillSep $ map OH.pretty $ words str -optParseCalExprFromFile :: OP.Parser [FilePath] -optParseCalExprFromFile = OP.many (OP.strOption ( +optParseNamedCalExprFromFile :: OP.Parser [FilePath] +optParseNamedCalExprFromFile = OP.many (OP.strOption ( OP.long "inputFile" <> OP.short 'i' <> + OP.metavar "FILE" <> OP.help "A file with a list of calibration expressions. \ - \Formated just as DATE, but with a new line for each input date. \ - \DATE and --inputFile can be combined and you can provide multiple instances of --inputFile" + \Formatted just as CalEXPRs, but with a new line for each input expression. \ + \CalEXPRs and --inputFile can be combined and you can provide multiple \ + \instances of --inputFile. \ + \Note that syntactic sugar allows to read simple radiocarbon dates from \ + \a headless .csv file with one sample per row: \ + \,,." )) optParseCalCurveFromFile :: OP.Parser (Maybe FilePath) optParseCalCurveFromFile = OP.option (Just <$> OP.str) ( - OP.long "calibrationCurveFile" <> - OP.help "Path to an calibration curve file in .14c format. \ + OP.long "calCurveFile" <> + OP.metavar "FILE" <> + OP.help "Path to an calibration curve file in '.14c' format. \ \The calibration curve will be read and used for calibration. \ - \If no file is provided, currycarbon will use the intcal20 curve." <> + \If no file is provided, currycarbon will use the 'intcal20' curve." <> OP.value Nothing ) optParseCalibrationMethod :: OP.Parser CalibrationMethod optParseCalibrationMethod = OP.option (OP.eitherReader readCalibrationMethod) ( OP.long "method" <> - OP.help "The calibration algorithm that should be used: \ - \\",,\". \ - \The default setting is equivalent to \"Bchron,StudentT,100\" \ + OP.metavar "DSL" <> + OP.helpDoc ( Just ( + s2d "The calibration algorithm that should be used: \ + \',,'. " + <> OH.hardline <> + s2d "The default setting is equivalent to \"Bchron,StudentT,100\" \ \which copies the algorithm implemented in the Bchron R package. \ - \Alternatively we implemented \"MatrixMult\", which comes without further arguments. \ \For the Bchron algorithm with a normal distribution (\"Bchron,Normal\") \ - \the degrees of freedom argument is not relevant" <> + \the degrees of freedom argument is not relevant" + <> OH.hardline <> + s2d "Alternatively we implemented \"MatrixMult\", which comes without further \ + \arguments." + )) <> OP.value (Bchron $ StudentTDist 100) ) optParseAllowOutside :: OP.Parser (Bool) optParseAllowOutside = OP.switch ( OP.long "allowOutside" <> - OP.help "Allow calibrations to run outside the range of the calibration curve" + OP.help "Allow calibrations to run outside the range of the calibration curve." ) optParseDontInterpolateCalCurve :: OP.Parser (Bool) optParseDontInterpolateCalCurve = OP.switch ( OP.long "noInterpolation" <> - OP.help "Don't interpolate the calibration curve" + OP.help "Do not interpolate the calibration curve." ) optParseQuiet :: OP.Parser (Bool) optParseQuiet = OP.switch ( OP.long "quiet" <> OP.short 'q' <> - OP.help "Suppress the printing of calibration results to the command line" + OP.help "Suppress the printing of calibration results to the command line." ) optParseDensityFile :: OP.Parser (Maybe FilePath) optParseDensityFile = OP.option (Just <$> OP.str) ( OP.long "densityFile" <> - OP.help "Path to an output file which stores output densities per sample and calender year" <> + OP.metavar "FILE" <> + OP.help "Path to an output file to store output densities per CalEXPR and calender \ + \year." <> OP.value Nothing ) optParseHDRFile :: OP.Parser (Maybe FilePath) optParseHDRFile = OP.option (Just <$> OP.str) ( OP.long "hdrFile" <> - OP.help "Path to an output file which stores the high probability density regions for each \ - \sample" <> + OP.metavar "FILE" <> + OP.help "Path to an output file to store the high probability density regions for each \ + \CalEXPR." <> OP.value Nothing ) +optParseAgeSamplingSettings :: OP.Parser (Maybe (Maybe Word, Word, FilePath)) +optParseAgeSamplingSettings = + OP.optional $ (,,) <$> + optParseAgeSamplingConfSeed + <*> optParseAgeSamplingConfNrOfSamples + <*> optParseAgeSamplingFile + +optParseAgeSamplingConfSeed :: OP.Parser (Maybe Word) +optParseAgeSamplingConfSeed = OP.option (Just <$> OP.auto) ( + OP.long "seed" + <> OP.metavar "INT" + <> OP.help "Seed for the random number generator for age sampling. \ + \The default causes currycarbon to fall back to a random seed." + <> OP.value Nothing + <> OP.showDefault + ) + +optParseAgeSamplingConfNrOfSamples :: OP.Parser Word +optParseAgeSamplingConfNrOfSamples = OP.option OP.auto ( + OP.short 'n' + <> OP.long "nrSamples" + <> OP.metavar "INT" + <> OP.help "Number of age samples to draw per CalEXPR." + ) + +optParseAgeSamplingFile :: OP.Parser FilePath +optParseAgeSamplingFile = OP.strOption ( + OP.long "samplesFile" <> + OP.metavar "FILE" <> + OP.help "Path to an output file to store age samples for each CalEXPR." + ) + optParseCalCurveSegmentFile :: OP.Parser (Maybe FilePath) optParseCalCurveSegmentFile = OP.option (Just <$> OP.str) ( - OP.long "calCurveSegmentFile" <> - OP.help "Path to an output file which stores the relevant, interpolated calibration curve \ - \segment for the first (!) input date in a long format. \ - \This option as well as --calCurveMatrixFile are mostly meant for debugging" <> + OP.long "calCurveSegFile" <> + OP.metavar "FILE" <> + OP.help "Path to an output file to store the relevant, interpolated calibration curve \ + \segment for the first (!) input date. \ + \This option as well as --calCurveMatFile are meant for debugging." <> OP.value Nothing ) optParseCalCurveMatrixFile :: OP.Parser (Maybe FilePath) optParseCalCurveMatrixFile = OP.option (Just <$> OP.str) ( - OP.long "calCurveMatrixFile" <> + OP.long "calCurveMatFile" <> + OP.metavar "FILE" <> OP.help "Path to an output file which stores the relevant, interpolated calibration curve \ - \segment for the first (!) input date in a wide matrix format" <> + \segment for the first (!) input date in a wide matrix format." <> OP.value Nothing ) diff --git a/src/Currycarbon.hs b/src/Currycarbon.hs index d7cd0cd..6d86a2f 100644 --- a/src/Currycarbon.hs +++ b/src/Currycarbon.hs @@ -45,7 +45,13 @@ module Currycarbon ( CalExpr (..), addPDFs, multiplyPDFs, - normalizeCalPDF + normalizeCalPDF, + + -- * Drawing random samples from CalPDFs + -- $randsamp + AgeSamplingConf (..), + sampleAgesFromCalPDF, + RandomAgeSample (..) ) where import Currycarbon.CalCurves.Intcal20 @@ -128,3 +134,15 @@ which allow to combine two 'CalPDF's with the respective operation. Depending on the application, 'normalizeCalPDF' will come in handy here, to normalize the output density distributions. -} + +{- $randsamp + +Another common requirement for archaeological data analysis is temporal resampling, +where random age samples are drawn from 'CalPDF's according to the probability +density distribution. + +currycarbon supports this with 'sampleAgesFromCalPDF', which takes a configuration +data type 'AgeSamplingConf' including a random number generator and the number of +requested age samples, and an arbitrary 'CalPDF'. It returns an object of type +'RandomAgeSample' with a vector of sampled 'YearBCAD's. +-} diff --git a/src/Currycarbon/CLI/RunCalibrate.hs b/src/Currycarbon/CLI/RunCalibrate.hs index 1eb264f..c1dddfb 100644 --- a/src/Currycarbon/CLI/RunCalibrate.hs +++ b/src/Currycarbon/CLI/RunCalibrate.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE BangPatterns #-} - module Currycarbon.CLI.RunCalibrate (CalibrateOptions (..), runCalibrate) where @@ -10,14 +8,16 @@ import Currycarbon.SumCalibration import Currycarbon.Types import Currycarbon.Utils +import Control.Exception (throwIO) import Control.Monad (unless, when) import Data.Maybe (fromJust, fromMaybe, isJust) -import System.IO (hPutStrLn, stderr, stdout) +import System.IO (hPutStrLn, stderr) +import qualified System.Random as R -- | A data type to represent the options to the CLI module function runCalibrate data CalibrateOptions = CalibrateOptions { - _calibrateExprs :: [CalExpr] -- ^ String listing the uncalibrated dates that should be calibrated + _calibrateExprs :: [NamedCalExpr] -- ^ String listing the uncalibrated dates that should be calibrated , _calibrateExprFiles :: [FilePath] -- ^ List of files with uncalibrated dates to be calibrated , _calibrateCalCurveFile :: Maybe FilePath -- ^ Path to a .14c file , _calibrateCalibrationMethod :: CalibrationMethod -- ^ Calibration algorithm that should be used @@ -27,17 +27,27 @@ data CalibrateOptions = CalibrateOptions { , _calibrateStdOutEncoding :: String -- ^ Encoding of the stdout stream (show TextEncoding) , _calibrateDensityFile :: Maybe FilePath -- ^ Path to an output file (see CLI documentation) , _calibrateHDRFile :: Maybe FilePath -- ^ Path to an output file + , _calibrateAgeSampling :: Maybe (Maybe Word, Word, FilePath) -- ^ Settings for the age sampling , _calibrateCalCurveSegmentFile :: Maybe FilePath -- ^ Path to an output file , _calibrateCalCurveMatrixFile :: Maybe FilePath -- ^ Path to an output file } -- | Interface function to trigger calibration from the command line runCalibrate :: CalibrateOptions -> IO () -runCalibrate (CalibrateOptions exprs exprFiles calCurveFile method allowOutside noInterpolate quiet encoding densityFile hdrFile calCurveSegmentFile calCurveMatrixFile) = do +runCalibrate ( + CalibrateOptions + exprs exprFiles + calCurveFile method allowOutside noInterpolate + quiet encoding + densityFile hdrFile + ageSampling + calCurveSegmentFile calCurveMatrixFile + ) = do let ascii = encoding /= "UTF-8" -- compile dates - exprsFromFile <- mapM readCalExprFromFile exprFiles - let exprsRenamed = replaceEmptyNames $ exprs ++ concat exprsFromFile + exprsFromFile <- mapM readNamedCalExprsFromFile exprFiles + let exprsCombined = exprs ++ concat exprsFromFile + exprsRenamed = replaceEmptyNames exprsCombined if null exprsRenamed then hPutStrLn stderr "Nothing to calibrate. See currycarbon -h for help" else do @@ -50,68 +60,144 @@ runCalibrate (CalibrateOptions exprs exprFiles calCurveFile method allowOutside , _calConfAllowOutside = allowOutside , _calConfInterpolateCalCurve = not noInterpolate } + -- handle the special debug cases + when (isJust calCurveSegmentFile || isJust calCurveMatrixFile) $ do + case exprsRenamed of + [NamedCalExpr _ (UnCalDate uncal)] -> do + let calCurveSegment = prepareCalCurveSegment (not noInterpolate) $ + getRelevantCalCurveSegment uncal calCurve + when (isJust calCurveSegmentFile) $ + writeCalCurve (fromJust calCurveSegmentFile) calCurveSegment + when (isJust calCurveMatrixFile) $ + writeCalCurveMatrix (fromJust calCurveMatrixFile) $ + makeCalCurveMatrix (uncalToPDF uncal) calCurveSegment + _ -> do + throwIO $ CurrycarbonCLIException + "--calCurveSegFile and --calCurveMatFile only work with \ + \a single uncalibrated radiocarbon date." -- run calibration hPutStrLn stderr "Calibrating..." - let errorOrCalPDFs = map (evalCalExpr calConf calCurve) exprsRenamed - handleDates ascii True calCurve $ zip exprsRenamed errorOrCalPDFs + let errorOrCalPDFs = map (evalNamedCalExpr calConf calCurve) exprsRenamed + -- prepare random number generator for age sampling + maybeRNG <- case ageSampling of + Nothing -> pure Nothing + Just (maybeSeed, _, _) -> case maybeSeed of + Nothing -> Just <$> R.initStdGen + Just seed -> return $ Just $ R.mkStdGen (fromIntegral seed) + -- prepare and write the output per expression + handleExprs ascii True calCurve maybeRNG $ zip exprsRenamed errorOrCalPDFs where - -- the bool manages if a date is the first, calibratable date - handleDates :: Bool -> Bool -> CalCurveBP -> [(CalExpr, Either CurrycarbonException CalPDF)] -> IO () - handleDates _ _ _ [] = hPutStrLn stderr "Done." - handleDates _ascii True calCurve (firstDate:otherDates) = case firstDate of - (_, Left e) -> printE e >> handleDates _ascii True calCurve otherDates - (calExpr, Right cPDF) -> firstOut _ascii calCurve calExpr cPDF >> handleDates _ascii False calCurve otherDates - handleDates _ascii False calCurve (firstDate:otherDates) = case firstDate of - (_, Left e) -> printE e >> handleDates _ascii False calCurve otherDates - (calExpr, Right cPDF) -> otherOut _ascii calExpr cPDF >> handleDates _ascii False calCurve otherDates - firstOut :: Bool -> CalCurveBP -> CalExpr -> CalPDF -> IO () - firstOut _ascii calCurve calExpr@(UnCalDate uncal) calPDF = do - flexOut _ascii calExpr calPDF writeCalPDF writeCalC14 - when (isJust calCurveSegmentFile || isJust calCurveMatrixFile) $ do - hPutStrLn stderr $ - "Warning: The calCurveSegment file and the calCurveMatrix file only consider the first date, " ++ - renderUncalC14 uncal - let calCurveSegment = prepareCalCurveSegment (not noInterpolate) $ getRelevantCalCurveSegment uncal calCurve - when (isJust calCurveSegmentFile) $ - writeCalCurve (fromJust calCurveSegmentFile) calCurveSegment - when (isJust calCurveMatrixFile) $ - writeCalCurveMatrix (fromJust calCurveMatrixFile) $ - makeCalCurveMatrix (uncalToPDF uncal) calCurveSegment - firstOut _ascii _ calExpr calPDF = do - flexOut _ascii calExpr calPDF writeCalPDF writeCalC14 - when (isJust calCurveSegmentFile || isJust calCurveMatrixFile) $ do - hPutStrLn stderr $ "Warning: The calCurveSegment file and the calCurveMatrix file can only be produced for simple dates" - otherOut :: Bool -> CalExpr -> CalPDF -> IO () - otherOut _ascii calExpr calPDF = - flexOut _ascii calExpr calPDF appendCalPDF appendCalC14 - flexOut :: Bool -> CalExpr -> CalPDF -> (FilePath -> CalPDF -> IO ()) -> (FilePath -> CalC14 -> IO ()) -> IO () - flexOut _ascii calExpr calPDF calPDFToFile calC14ToFile = do + + -- loop over first and subsequent expressions + handleExprs :: + Bool -- encoding + -> Bool -- is this expression the first in the list of expressions? + -> CalCurveBP + -> Maybe R.StdGen -- rng for the age sampling seeds + -> [(NamedCalExpr, Either CurrycarbonException CalPDF)] + -> IO () + handleExprs _ _ _ _ [] = hPutStrLn stderr "Done." + -- first expression + handleExprs _ascii True calCurve maybeRNG (firstDate:otherDates) = + case firstDate of + (_, Left e) -> do + printE e + handleExprs _ascii True calCurve maybeRNG otherDates + (namedCalExpr, Right cPDF) -> do + let (sampleSeed, newRNG) = drawSeed maybeRNG + flexOut _ascii namedCalExpr cPDF sampleSeed writeCalPDF writeCalC14 writeRandomAgeSample + handleExprs _ascii False calCurve newRNG otherDates + -- subsequent expression + handleExprs _ascii False calCurve maybeRNG (nextDate:otherDates) = + case nextDate of + (_, Left e) -> do + printE e + handleExprs _ascii False calCurve maybeRNG otherDates + (namedCalExpr, Right cPDF) -> do + let (sampleSeed, newRNG) = drawSeed maybeRNG + flexOut _ascii namedCalExpr cPDF sampleSeed appendCalPDF appendCalC14 appendRandomAgeSample + handleExprs _ascii False calCurve newRNG otherDates + + printE :: CurrycarbonException -> IO () + printE e = hPutStrLn stderr $ renderCurrycarbonException e + + drawSeed :: Maybe R.StdGen -> (Maybe Int, Maybe R.StdGen) + drawSeed maybeRNG = (\x -> (fromIntegral . fst <$> x, snd <$> x)) (R.genWord32 <$> maybeRNG) + + -- flexible expression handler + flexOut :: + Bool + -> NamedCalExpr + -> CalPDF + -> Maybe Int + -> (FilePath -> CalPDF -> IO ()) + -> (FilePath -> CalC14 -> IO ()) + -> (FilePath -> RandomAgeSample -> IO ()) + -> IO () + flexOut _ascii namedCalExpr calPDF maybeSeed calPDFToFile calC14ToFile randomAgeSampleToFile = do + -- try to refine calPDF case refineCalDate calPDF of + -- refining did not work Nothing -> do unless quiet $ do - hPutStrLn stdout $ renderCalExpr calExpr - hPutStrLn stderr "Warning: Could not calculate meaningful HDRs for this expression. Check --densityFile." - when (isJust hdrFile) $ unless quiet $ hPutStrLn stderr "Nothing written to the HDR file" - when (isJust densityFile) $ calPDFToFile (fromJust densityFile) calPDF + putStrLn (renderNamedCalExpr namedCalExpr) + hPutStrLn stderr "Warning: Could not calculate meaningful HDRs for this expression. \ + \Check --densityFile." + when (isJust hdrFile) $ + unless quiet $ hPutStrLn stderr "Nothing written to the HDR file" + when (isJust densityFile) $ + calPDFToFile (fromJust densityFile) calPDF + when (isJust ageSampling && isJust maybeSeed) $ + runAgeSampling (fromJust ageSampling) (fromJust maybeSeed) calPDF randomAgeSampleToFile + -- refining did work Just calC14 -> do - unless quiet $ hPutStrLn stdout $ renderCalDatePretty _ascii (calExpr, calPDF, calC14) - when (isJust hdrFile) $ calC14ToFile (fromJust hdrFile) calC14 - when (isJust densityFile) $ calPDFToFile (fromJust densityFile) calPDF - printE :: CurrycarbonException -> IO () - printE e = hPutStrLn stderr $ renderCurrycarbonException e + unless quiet $ do + putStrLn (renderCalDatePretty _ascii (namedCalExpr, calPDF, calC14)) + when (isJust hdrFile) $ + calC14ToFile (fromJust hdrFile) calC14 + when (isJust densityFile) $ + calPDFToFile (fromJust densityFile) calPDF + when (isJust ageSampling && isJust maybeSeed) $ + runAgeSampling (fromJust ageSampling) (fromJust maybeSeed) calPDF randomAgeSampleToFile + + runAgeSampling :: + (Maybe Word, Word, FilePath) + -> Int + -> CalPDF + -> (FilePath -> RandomAgeSample -> IO ()) + -> IO () + runAgeSampling (_, nrOfSamples, path) seed calPDF randomAgeSampleToFile = do + let rng = R.mkStdGen seed + conf = AgeSamplingConf rng nrOfSamples + samplingResult = sampleAgesFromCalPDF conf calPDF + randomAgeSampleToFile path samplingResult + -- | Helper function to replace empty input names with a sequence of numbers, -- to get each input date an unique identifier -replaceEmptyNames :: [CalExpr] -> [CalExpr] -replaceEmptyNames = zipWith (replaceName . show) ([1..] :: [Integer]) +replaceEmptyNames :: [NamedCalExpr] -> [NamedCalExpr] +replaceEmptyNames = zipWith (modifyNamedExpr . show) ([1..] :: [Integer]) where + modifyNamedExpr :: String -> NamedCalExpr -> NamedCalExpr + modifyNamedExpr i nexpr = + if _exprID nexpr == "" + then nexpr { _exprID = i, _expr = replaceName i (_expr nexpr) } + else nexpr { _expr = replaceName i (_expr nexpr) } replaceName :: String -> CalExpr -> CalExpr replaceName i (UnCalDate (UncalC14 name x y)) = - if name == "unknownSampleName" + if name == "" then UnCalDate $ UncalC14 i x y else UnCalDate $ UncalC14 name x y + replaceName i (WindowBP (TimeWindowBP name start stop)) = + if name == "" + then WindowBP $ TimeWindowBP i start stop + else WindowBP $ TimeWindowBP name start stop + replaceName i (WindowBCAD (TimeWindowBCAD name start stop)) = + if name == "" + then WindowBCAD $ TimeWindowBCAD i start stop + else WindowBCAD $ TimeWindowBCAD name start stop replaceName i (CalDate (CalPDF name x y)) = - if name == "unknownSampleName" + if name == "" then CalDate $ CalPDF i x y else CalDate $ CalPDF name x y replaceName i (SumCal a b) = SumCal (replaceName (i ++ "s") a) (replaceName (i ++ "S") b) diff --git a/src/Currycarbon/Calibration/Calibration.hs b/src/Currycarbon/Calibration/Calibration.hs index 509360f..67cba98 100644 --- a/src/Currycarbon/Calibration/Calibration.hs +++ b/src/Currycarbon/Calibration/Calibration.hs @@ -16,6 +16,8 @@ module Currycarbon.Calibration.Calibration , refineCalDate , CalibrateDatesConf (..) , defaultCalConf + , AgeSamplingConf (..) + , sampleAgesFromCalPDF ) where import Currycarbon.Calibration.Bchron @@ -24,10 +26,12 @@ import Currycarbon.Calibration.Utils import Currycarbon.Types import Currycarbon.Utils +import qualified Control.Monad.Random as CMR import Data.List (elemIndex, groupBy, sort, sortBy) import Data.Maybe (fromJust) import qualified Data.Vector.Unboxed as VU +import qualified System.Random as R -- | A data type to cover the configuration options of the calibrateDates function data CalibrateDatesConf = CalibrateDatesConf { @@ -83,25 +87,44 @@ refineCalDates :: [CalPDF] -> [Maybe CalC14] refineCalDates = map refineCalDate refineCalDate :: CalPDF -> Maybe CalC14 -refineCalDate (CalPDF name cals dens) = - if VU.sum dens == 0 || VU.length (VU.filter (>= 1.0) dens) == 1 -- don't calculate CalC14, if it's not meaningful - then Nothing - else Just $ CalC14 { +refineCalDate (CalPDF name cals dens) + -- don't calculate CalC14, if it's not meaningful + | VU.sum dens == 0 || VU.length (VU.filter (>= 1.0) dens) == 1 = Nothing + -- for simple uniform age ranges + | VU.length (VU.uniq dens) == 1 = + let start = VU.head cals + stop = VU.last cals + in Just $ CalC14 { + _calC14id = name + , _calC14RangeSummary = CalRangeSummary { + _calRangeStartTwoSigma = start + , _calRangeStartOneSigma = start + , _calRangeMedian = median + , _calRangeStopOneSigma = stop + , _calRangeStopTwoSigma = stop + } + , _calC14HDROneSigma = [HDR start stop] + , _calC14HDRTwoSigma = [HDR start stop] + } + -- for normal post-calibration probability distributions + | otherwise = + Just $ CalC14 { _calC14id = name , _calC14RangeSummary = CalRangeSummary { _calRangeStartTwoSigma = _hdrstart $ head hdrs95 , _calRangeStartOneSigma = _hdrstart $ head hdrs68 - , _calRangeMedian = fromJust $ cals `indexVU` elemIndex (minimum distanceTo05) distanceTo05 + , _calRangeMedian = median , _calRangeStopOneSigma = _hdrstop $ last hdrs68 , _calRangeStopTwoSigma = _hdrstop $ last hdrs95 } , _calC14HDROneSigma = hdrs68 , _calC14HDRTwoSigma = hdrs95 - } + } where -- simple density cumsum for median age cumsumDensities = cumsumDens (VU.toList $ VU.zip cals dens) distanceTo05 = map (\x -> abs $ (x - 0.5)) cumsumDensities + median = fromJust $ cals `indexVU` elemIndex (minimum distanceTo05) distanceTo05 -- sorted density cumsum for hdrs sortedDensities = sortBy (flip (\ (_, dens1) (_, dens2) -> compare dens1 dens2)) (VU.toList $ VU.zip cals dens) cumsumSortedDensities = cumsumDens sortedDensities @@ -131,3 +154,25 @@ refineCalDate (CalPDF name cals dens) = getIn95 (_,_,_,x) = x getYear :: (Int, Float, Bool, Bool) -> Int getYear (year,_,_,_) = year + +-- age sampling + +-- | A data type to define the settings for age sampling +data AgeSamplingConf = AgeSamplingConf { + -- | Random number generator + _assRNG :: R.StdGen + -- | Number of samples that should be drawn per sample + , _assNumberOfSamples :: Word + } deriving (Show, Eq) + +-- | Draw random samples from a probability density table +sampleAgesFromCalPDF :: AgeSamplingConf -> CalPDF -> RandomAgeSample +sampleAgesFromCalPDF (AgeSamplingConf rng n) (CalPDF calPDFid cals dens) = + let weightedList = zip (VU.toList cals) (map toRational $ VU.toList dens) + infSamplesList = sampleWeightedList rng weightedList + samples = take (fromIntegral n) infSamplesList + in RandomAgeSample calPDFid (VU.fromList samples) + where + sampleWeightedList :: CMR.RandomGen g => g -> [(a, Rational)] -> [a] + sampleWeightedList gen weights = CMR.evalRand m gen + where m = sequence . repeat . CMR.fromList $ weights diff --git a/src/Currycarbon/Calibration/Utils.hs b/src/Currycarbon/Calibration/Utils.hs index 1ef5d1f..19a5a2e 100644 --- a/src/Currycarbon/Calibration/Utils.hs +++ b/src/Currycarbon/Calibration/Utils.hs @@ -63,7 +63,10 @@ makeBCADCalCurve :: CalCurveBP -> CalCurveBCAD makeBCADCalCurve (CalCurveBP cals uncals sigmas) = CalCurveBCAD (vectorBPToBCAD cals) (vectorBPToBCAD uncals) sigmas vectorBPToBCAD :: VU.Vector YearBP -> VU.Vector YearBCAD -vectorBPToBCAD = VU.map (\x -> -(fromIntegral x) + 1950) +vectorBPToBCAD = VU.map bp2BCAD + +bp2BCAD :: YearBP -> YearBCAD +bp2BCAD x = -(fromIntegral x) + 1950 interpolateCalCurve :: CalCurveBP -> CalCurveBP interpolateCalCurve (CalCurveBP cals uncals sigmas) = diff --git a/src/Currycarbon/ParserHelpers.hs b/src/Currycarbon/ParserHelpers.hs new file mode 100644 index 0000000..bf83a89 --- /dev/null +++ b/src/Currycarbon/ParserHelpers.hs @@ -0,0 +1,168 @@ +module Currycarbon.ParserHelpers where + +import qualified Text.Parsec as P +import qualified Text.Parsec.Error as P +import qualified Text.Parsec.String as P + +-- * High level building blocks + +parseRecordType :: String -> P.Parser a -> P.Parser a +parseRecordType typeName parser = do + _ <- P.string typeName + parseInParens parser + +parseNamedVector :: P.Parser a -> P.Parser b -> P.Parser [(a,b)] +parseNamedVector parseKey parseValue = + parseVector $ parseKeyValuePair parseKey parseValue + +parseVector :: P.Parser a -> P.Parser [a] +parseVector parser = do + _ <- P.char 'c' + parseInParens (P.sepBy parser consumeCommaSep) + +parseArgumentWithDefault :: String -> P.Parser b -> b -> P.Parser b +parseArgumentWithDefault argumentName parseValue defaultValue = + P.option defaultValue (parseArgument argumentName parseValue) + +parseArgumentOptional :: String -> P.Parser b -> P.Parser (Maybe b) +parseArgumentOptional argumentName parseValue = + P.optionMaybe $ P.try (parseArgument argumentName parseValue) + +parseArgument :: String -> P.Parser b -> P.Parser b +parseArgument argumentName parseValue = do + res <- parseArgumentWithoutComma argumentName parseValue + P.optional consumeCommaSep + return res + +parseNamedArgumentOptional :: String -> P.Parser b -> P.Parser (Maybe b) +parseNamedArgumentOptional argumentName parseValue = + P.optionMaybe $ P.try (parseNamedArgument argumentName parseValue) + +-- * Low level blocks + +parseArgumentWithoutComma :: String -> P.Parser b -> P.Parser b +parseArgumentWithoutComma argumentName parseValue = + P.try (parseNamedArgument argumentName parseValue) P.<|> parseUnnamedArgument parseValue + +parseNamedArgument :: String -> P.Parser b -> P.Parser b +parseNamedArgument argumentName parseValue = do + (_,b) <- parseKeyValuePair (P.string argumentName) parseValue + return b + +parseUnnamedArgument :: P.Parser b -> P.Parser b +parseUnnamedArgument parseValue = parseValue + +parseKeyValuePair :: P.Parser a -> P.Parser b -> P.Parser (a,b) +parseKeyValuePair parseKey parseValue = do + key <- parseKey + consumeEqualSep + value <- parseValue + return (key, value) + +parseInParens :: P.Parser b -> P.Parser b +parseInParens parser = do + _ <- P.char '(' + _ <- P.spaces + res <- parser + _ <- P.spaces + _ <- P.char ')' + return res + +consumeEqualSep :: P.Parser () +consumeEqualSep = do + _ <- P.spaces *> P.char '=' <* P.spaces + return () +consumeCommaSep :: P.Parser () +consumeCommaSep = do + _ <- P.spaces *> P.char ',' <* P.spaces + return () + +parseCharInSpace :: Char -> P.Parser Char +parseCharInSpace c = P.between P.spaces P.spaces (P.char c) + +parseAnyString :: P.Parser String +parseAnyString = + P.try inDoubleQuotes P.<|> P.try inSingleQuotes P.<|> inNoQuotes + where + inDoubleQuotes = P.between (P.char '"') (P.char '"') (P.many P.anyChar) + inSingleQuotes = P.between (P.char '\'') (P.char '\'') (P.many P.anyChar) + inNoQuotes = P.many (P.noneOf ",):") + +-- * Sequence parsers + +parseDoubleSequence :: P.Parser [Double] +parseDoubleSequence = do + start <- parseDouble + _ <- P.oneOf ":" + stop <- parseDouble + _ <- P.oneOf ":" + by <- parsePositiveFloatNumber + return [start,(start+by)..stop] + +-- * Number parsers + +parseDouble :: P.Parser Double +parseDouble = do + P.try parseNegativeFloatNumber P.<|> parsePositiveFloatNumber + +parseNegativeFloatNumber :: P.Parser Double +parseNegativeFloatNumber = do + _ <- P.oneOf "-" + i <- parsePositiveFloatNumber + return (-i) + +parseFraction :: P.Parser Double +parseFraction = do + num <- parsePositiveFloatNumber + if num > 1 + then fail "must be between zero and one" + else return num + +parsePositiveFloatNumber :: P.Parser Double +parsePositiveFloatNumber = do + num <- parseNumber + optionalMore <- P.option "" $ (:) <$> P.char '.' <*> parseNumber + return $ read $ num ++ optionalMore + +parseIntegerSequence :: P.Parser [Int] +parseIntegerSequence = do + start <- parseInt + _ <- P.oneOf ":" + stop <- parseInt + _ <- P.oneOf ":" + by <- fromIntegral <$> parsePositiveInt + return [start,(start+by)..stop] + +parseInt :: P.Parser Int +parseInt = do + P.try parseNegativeInt P.<|> parsePositiveInt + +parseNegativeInt :: P.Parser Int +parseNegativeInt = do + _ <- P.oneOf "-" + i <- parsePositiveInt + return (-i) + +parsePositiveInt :: P.Parser Int +parsePositiveInt = fromIntegral <$> parseWord + +-- https://hackage.haskell.org/package/base-4.19.0.0/docs/Data-Word.html +parseWord :: P.Parser Word +parseWord = do + read <$> parseNumber + +parsePositiveDouble :: P.Parser Double +parsePositiveDouble = do + read <$> parseNumber + +parseNumber :: P.Parser [Char] +parseNumber = P.many1 P.digit + +-- * Error helpers + +showParsecErr :: P.ParseError -> String +showParsecErr err = + P.showErrorMessages + "or" "unknown parse error" + "expecting" "unexpected" "end of input" + (P.errorMessages err) diff --git a/src/Currycarbon/Parsers.hs b/src/Currycarbon/Parsers.hs index f13bc02..7ae7eb9 100644 --- a/src/Currycarbon/Parsers.hs +++ b/src/Currycarbon/Parsers.hs @@ -2,15 +2,16 @@ module Currycarbon.Parsers where +import Currycarbon.ParserHelpers import Currycarbon.Types import Currycarbon.Utils -import Control.Exception (throwIO) -import Data.List (intercalate, transpose) -import qualified Data.Vector as V -import qualified Data.Vector.Unboxed as VU -import qualified Text.Parsec as P -import qualified Text.Parsec.String as P +import Control.Exception (throwIO) +import Data.List (intercalate, transpose) +import qualified Data.Vector as V +import qualified Data.Vector.Unboxed as VU +import qualified Text.Parsec as P +import qualified Text.Parsec.String as P -- * Parsing, rendering and writing functions -- @@ -19,12 +20,13 @@ import qualified Text.Parsec.String as P -- This module contains a number of functions to manage data input and -- output plumbing for different datatypes --- CalibrationMethod +-- read the calibration method + readCalibrationMethod :: String -> Either String CalibrationMethod readCalibrationMethod s = case P.runParser parseCalibrationMethod () "" s of - Left err -> Left $ renderCurrycarbonException $ CurrycarbonCLIParsingException $ show err - Right x -> Right x + Left err -> Left $ showParsecErr err + Right x -> Right x parseCalibrationMethod :: P.Parser CalibrationMethod parseCalibrationMethod = do @@ -35,7 +37,7 @@ parseCalibrationMethod = do P.try studentT P.<|> normal studentT = do _ <- P.string "StudentT," - dof <- read <$> P.many1 P.digit + dof <- parsePositiveDouble return (Bchron $ StudentTDist dof) normal = do _ <- P.string "Normal" @@ -44,11 +46,13 @@ parseCalibrationMethod = do _ <- P.string "MatrixMult" return MatrixMultiplication +-- pretty printing + -- | Combine 'CalExpr', 'CalPDF' and 'CalC14' to render pretty command line output -- like this: -- -- @ --- DATE: (5000±30BP + 5100±100BP) +-- CalEXPR: [Ex2] (S1:5000±30BP + S2:5100±100BP) -- Calibrated: 4150BC \>\> 3941BC \> 3814BC \< 3660BC \<\< 3651BC -- 1-sigma: 3941-3864BC, 3810-3707BC, 3667-3660BC -- 2-sigma: 4150-4148BC, 4048-3651BC @@ -67,90 +71,230 @@ parseCalibrationMethod = do -- renderCalDatePretty :: Bool -- ^ Should the CLI plot be restricted to (boring) ASCII symbols? - -> (CalExpr, CalPDF, CalC14) + -> (NamedCalExpr, CalPDF, CalC14) -> String renderCalDatePretty ascii (calExpr, calPDF, calC14) = - "DATE: " ++ intercalate "\n" [ - renderCalExpr calExpr + "CalEXPR: " ++ intercalate "\n" [ + renderNamedCalExpr calExpr , renderCalC14 calC14 , renderCLIPlotCalPDF ascii 6 50 calPDF calC14 ] +-- write and read calibration expressions + +renderNamedCalExpr :: NamedCalExpr -> String +renderNamedCalExpr (NamedCalExpr exprID calExpr) = renderExprID exprID ++ " " ++ renderCalExpr calExpr + +renderExprID :: String -> String +renderExprID s = "[" ++ s ++ "]" + renderCalExpr :: CalExpr -> String renderCalExpr (UnCalDate a) = renderUncalC14 a +renderCalExpr (WindowBP a) = renderTimeWindowBP a +renderCalExpr (WindowBCAD a) = renderTimeWindowBCAD a renderCalExpr (CalDate (CalPDF name _ _)) = name renderCalExpr (SumCal a b) = "(" ++ renderCalExpr a ++ " + " ++ renderCalExpr b ++ ")" renderCalExpr (ProductCal a b) = "(" ++ renderCalExpr a ++ " * " ++ renderCalExpr b ++ ")" +renderTimeWindowBP :: TimeWindowBP -> String +renderTimeWindowBP (TimeWindowBP name start stop) = + name ++ ":" ++ renderYearBP start ++ "-" ++ renderYearBP stop + +renderTimeWindowBCAD :: TimeWindowBCAD -> String +renderTimeWindowBCAD (TimeWindowBCAD name start stop) = + name ++ ":" ++ renderYearBCAD start ++ "-" ++ renderYearBCAD stop + +parseTimeWindowBP :: P.Parser TimeWindowBP +parseTimeWindowBP = parseRecordType "rangeBP" $ P.try long P.<|> short + where + long = do + name <- parseArgument "id" parseAnyString + start <- parseArgument "start" parseWord + stop <- parseArgument "stop" parseWord + construct name start stop + short = do + start <- parseArgument "start" parseWord + stop <- parseArgument "stop" parseWord + construct "" start stop + construct name start stop = do + if start >= stop + then return (TimeWindowBP name start stop) + else fail "the BP stop date can not be larger than the start date" + +parseTimeWindowBCAD :: P.Parser TimeWindowBCAD +parseTimeWindowBCAD = parseRecordType "rangeBCAD" $ P.try long P.<|> short + where + long = do + name <- parseArgument "id" parseAnyString + start <- parseArgument "start" parseInt + stop <- parseArgument "stop" parseInt + construct name start stop + short = do + start <- parseArgument "start" parseInt + stop <- parseArgument "stop" parseInt + construct "" start stop + construct name start stop = do + if start <= stop + then return (TimeWindowBCAD name start stop) + else fail "the BC/AD stop date can not be smaller than the start date" + -- https://gist.github.com/abhin4v/017a36477204a1d57745 -spaceChar :: Char -> P.Parser Char -spaceChar c = P.between P.spaces P.spaces (P.char c) ---spaceChar = P.char +addFun :: P.Parser CalExpr +addFun = parseRecordType "sum" $ do + a <- parseArgument "a" term + b <- parseArgument "b" expr + return $ SumCal a b -add :: P.Parser CalExpr -add = SumCal <$> term <*> (spaceChar '+' *> expr) +addOperator :: P.Parser CalExpr +addOperator = SumCal <$> term <*> (parseCharInSpace '+' *> expr) -mul :: P.Parser CalExpr -mul = ProductCal <$> factor <*> (spaceChar '*' *> term) +mulFun :: P.Parser CalExpr +mulFun = parseRecordType "product" $ do + a <- parseArgument "a" factor + b <- parseArgument "b" term + return $ ProductCal a b + +mulOperator :: P.Parser CalExpr +mulOperator = ProductCal <$> factor <*> (parseCharInSpace '*' *> term) parens :: P.Parser CalExpr -parens = P.between (spaceChar '(') (spaceChar ')') expr +parens = P.between (parseCharInSpace '(') (parseCharInSpace ')') expr factor :: P.Parser CalExpr -factor = parens P.<|> (UnCalDate <$> parseUncalC14) +factor = P.try parens + P.<|> P.try addFun + P.<|> P.try mulFun + P.<|> P.try (WindowBP <$> parseTimeWindowBP) + P.<|> P.try (WindowBCAD <$> parseTimeWindowBCAD) + P.<|> (UnCalDate <$> parseUncalC14) term :: P.Parser CalExpr -term = P.try mul P.<|> factor +term = P.try mulOperator P.<|> factor expr :: P.Parser CalExpr -expr = P.try add P.<|> term -- <* P.eof +expr = P.try addOperator P.<|> term -- <* P.eof -readCalExpr :: String -> Either String [CalExpr] -readCalExpr s = +namedExpr :: P.Parser NamedCalExpr +namedExpr = P.try nameBeforeColon P.<|> P.try record P.<|> onlyExpr + where + nameBeforeColon = do + name <- parseAnyString + _ <- P.char ':' + _ <- P.spaces + ex <- expr + return (NamedCalExpr name ex) + record = parseRecordType "calExpr" $ P.try long P.<|> short + long = do + name <- parseArgument "id" parseAnyString + ex <- parseArgument "expr" expr + return (NamedCalExpr name ex) + short = do + ex <- parseArgument "expr" expr + return (NamedCalExpr "" ex) + onlyExpr = NamedCalExpr "" <$> expr + +readNamedCalExprs :: String -> Either String [NamedCalExpr] +readNamedCalExprs s = case P.runParser parseCalExprSepBySemicolon () "" s of - Left err -> Left $ renderCurrycarbonException $ CurrycarbonCLIParsingException $ show err - Right x -> Right x + Left err -> Left $ showParsecErr err + Right x -> Right x where - parseCalExprSepBySemicolon :: P.Parser [CalExpr] - parseCalExprSepBySemicolon = P.sepBy expr (P.char ';' <* P.spaces) <* P.eof + parseCalExprSepBySemicolon :: P.Parser [NamedCalExpr] + parseCalExprSepBySemicolon = P.sepBy namedExpr (P.char ';' <* P.spaces) <* P.eof + +readOneNamedCalExpr :: String -> Either String NamedCalExpr +readOneNamedCalExpr s = + case P.runParser namedExpr () "" s of + Left err -> Left $ showParsecErr err + Right x -> Right x + +readNamedCalExprsFromFile :: FilePath -> IO [NamedCalExpr] +readNamedCalExprsFromFile uncalFile = do + ss <- lines <$> readFile uncalFile + mapM readOneLine ss + where + readOneLine :: String -> IO NamedCalExpr + readOneLine s = case readOneNamedCalExpr s of + Left err -> throwIO $ CurrycarbonCLIParsingException $ err ++ "\nin \"" ++ s ++ "\"" + Right x -> return x + +-- UncalC14 +renderUncalC14WithoutName :: UncalC14 -> String +renderUncalC14WithoutName (UncalC14 _ bp sigma) = show bp ++ "±" ++ show sigma ++ "BP" + +renderUncalC14 :: UncalC14 -> String +renderUncalC14 (UncalC14 name bp sigma) = name ++ ":" ++ show bp ++ "±" ++ show sigma ++ "BP" -readCalExprFromFile :: FilePath -> IO [CalExpr] -readCalExprFromFile uncalFile = do +-- | Read uncalibrated radiocarbon dates from a file. The file should feature one radiocarbon date +-- per line in the form "\,\,\", where +-- \ is optional. A valid file could look like this: +-- +-- @ +-- Sample1,5000,30 +-- 6000,50 +-- Sample3,4000,25 +-- @ +-- +readUncalC14FromFile :: FilePath -> IO [UncalC14] +readUncalC14FromFile uncalFile = do s <- readFile uncalFile - case P.runParser parseCalExprSepByNewline () "" s of - Left err -> throwIO $ CurrycarbonCLIParsingException $ show err + case P.runParser uncalC14SepByNewline () "" s of + Left err -> throwIO $ CurrycarbonCLIParsingException $ showParsecErr err Right x -> return x where - parseCalExprSepByNewline :: P.Parser [CalExpr] - parseCalExprSepByNewline = P.endBy expr (P.newline <* P.spaces) <* P.eof + uncalC14SepByNewline :: P.Parser [UncalC14] + uncalC14SepByNewline = P.endBy parseUncalC14 (P.newline <* P.spaces) <* P.eof + +readUncalC14 :: String -> Either String [UncalC14] +readUncalC14 s = + case P.runParser uncalC14SepBySemicolon () "" s of + Left err -> Left $ showParsecErr err + Right x -> Right x + where + uncalC14SepBySemicolon :: P.Parser [UncalC14] + uncalC14SepBySemicolon = P.sepBy parseUncalC14 (P.char ';' <* P.spaces) <* P.eof + +parseUncalC14 :: P.Parser UncalC14 +parseUncalC14 = P.try record P.<|> P.try long P.<|> short + where + record = parseRecordType "uncalC14" $ P.try long P.<|> short + long = do + name <- parseArgument "id" parseAnyString + age <- parseArgument "yearBP" parseWord + sigma <- parseArgument "sigma" parseWord + return (UncalC14 name age sigma) + short = do + age <- parseArgument "yearBP" parseWord + sigma <- parseArgument "sigma" parseWord + return (UncalC14 "" age sigma) -- CalC14 --- | Write 'CalC14's to the file system. The output file is a long .csv file with the following structure: +-- | Write 'CalC14's to the file system. The output file is a long .tsv file with the following structure: -- -- @ --- sample,hdrSigma,hdrStartBCAD,hdrStopBCAD --- Sample1,1,-3797,-3709 --- Sample1,1,-3894,-3880 --- Sample1,2,-3680,-3655 --- Sample1,2,-3810,-3700 --- Sample1,2,-3941,-3864 --- Sample2,1,-1142,-1130 --- Sample2,1,-1173,-1161 --- Sample2,1,-1293,-1194 --- Sample2,1,-1368,-1356 --- Sample2,2,-1061,-1059 --- Sample2,2,-1323,-1112 --- Sample2,2,-1393,-1334 +-- id hdrSigmaLevel hdrStartYearBCAD hdrStopYearBCAD +-- Sample1 1 -3797 -3709 +-- Sample1 1 -3894 -3880 +-- Sample1 2 -3680 -3655 +-- Sample1 2 -3810 -3700 +-- Sample1 2 -3941 -3864 +-- Sample2 1 -1142 -1130 +-- Sample2 1 -1173 -1161 +-- Sample2 1 -1293 -1194 +-- Sample2 1 -1368 -1356 +-- Sample2 2 -1061 -1059 +-- Sample2 2 -1323 -1112 +-- Sample2 2 -1393 -1334 -- @ -- writeCalC14s :: FilePath -> [CalC14] -> IO () writeCalC14s path calC14s = writeFile path $ - "sample,hdrSigma,hdrStartBCAD,hdrStopBCAD\n" + "id\thdrSigmaLevel\thdrStartYearBCAD\thdrStopYearBCAD\n" ++ intercalate "\n" (map renderCalC14ForFile calC14s) writeCalC14 :: FilePath -> CalC14 -> IO () writeCalC14 path calC14 = writeFile path $ - "sample,hdrSigma,hdrStartBCAD,hdrStopBCAD\n" + "id\thdrSigmaLevel\thdrStartYearBCAD\thdrStopYearBCAD\n" ++ renderCalC14ForFile calC14 appendCalC14 :: FilePath -> CalC14 -> IO () @@ -165,7 +309,7 @@ renderCalC14ForFile (CalC14 name _ hdrs68 hdrs95) = zip3 (repeat name) (repeat "2") (renderHDRsForFile hdrs95) where renderRow :: (String, String, (String, String)) -> String - renderRow (a, b, (c, d)) = intercalate "," [a,b,c,d] + renderRow (a, b, (c, d)) = intercalate "\t" [a,b,c,d] renderCalC14s :: [CalC14] -> String renderCalC14s xs = @@ -186,6 +330,11 @@ renderCalRangeSummary s = ++ renderYearBCAD (_calRangeStopOneSigma s) ++ " << " ++ renderYearBCAD (_calRangeStopTwoSigma s) +-- BP +renderYearBP :: YearBP -> String +renderYearBP x = + show x ++ "BP" -- ++ " (" ++ (renderYearBCAD $ bp2BCAD x) ++ ")" + -- BCAD renderYearBCAD :: YearBCAD -> String renderYearBCAD x @@ -217,40 +366,40 @@ writeCalCurveMatrix path calCurveMatrix = renderCalCurveMatrix :: CalCurveMatrix -> String renderCalCurveMatrix (CalCurveMatrix uncals cals curveDensities) = - let header = "," ++ intercalate "," (map show $ VU.toList cals) ++ "\n" + let header = "\t" ++ intercalate "\t" (map show $ VU.toList cals) ++ "\n" body = zipWith makeRow (VU.toList uncals) (transpose $ V.toList (V.map VU.toList curveDensities)) in header ++ intercalate "\n" body where - makeRow uncal dens = show uncal ++ "," ++ intercalate "," (map show dens) + makeRow uncal dens = show uncal ++ "\t" ++ intercalate "\t" (map show dens) -- CalPDF --- | Write 'CalPDF's to the file system. The output file is a long .csv file with the following structure: +-- | Write 'CalPDF's to the file system. The output file is a long .tsv file with the following structure: -- -- @ --- sample,calBCAD,density +-- id yearBCAD density -- ... --- Sample1,-1391,2.8917924e-4 --- Sample1,-1390,3.3285577e-4 --- Sample1,-1389,3.5674628e-4 --- Sample1,-1388,3.750703e-4 +-- Sample1 -1391 2.8917924e-4 +-- Sample1 -1390 3.3285577e-4 +-- Sample1 -1389 3.5674628e-4 +-- Sample1 -1388 3.750703e-4 -- ... --- Sample2,-3678,1.8128564e-3 --- Sample2,-3677,1.9512239e-3 --- Sample2,-3676,2.0227064e-3 --- Sample2,-3675,2.095691e-3 +-- Sample2 -3678 1.8128564e-3 +-- Sample2 -3677 1.9512239e-3 +-- Sample2 -3676 2.0227064e-3 +-- Sample2 -3675 2.095691e-3 -- ... -- @ -- writeCalPDFs :: FilePath -> [CalPDF] -> IO () writeCalPDFs path calPDFs = writeFile path $ - "sample,calBCAD,density\n" + "id\tyearBCAD\tdensity\n" ++ renderCalPDFs calPDFs writeCalPDF :: FilePath -> CalPDF -> IO () writeCalPDF path calPDF = writeFile path $ - "sample,calBCAD,density\n" + "id\tyearBCAD\tdensity\n" ++ renderCalPDF calPDF appendCalPDF :: FilePath -> CalPDF -> IO () @@ -264,8 +413,9 @@ renderCalPDF :: CalPDF -> String renderCalPDF (CalPDF name cals dens) = concatMap makeRow $ VU.toList $ VU.zip cals dens where - makeRow (x,y) = show name ++ "," ++ show x ++ "," ++ show y ++ "\n" + makeRow (x,y) = name ++ "\t" ++ show x ++ "\t" ++ show y ++ "\n" +-- cli plot data PlotSymbol = HistFill | HistTop | AxisEnd | AxisLine | AxisTick | HDRLine renderCLIPlotCalPDF :: Bool -> Int -> Int -> CalPDF -> CalC14 -> String @@ -356,59 +506,6 @@ renderCLIPlotCalPDF ascii rows cols (CalPDF _ cals dens) c14 = let ha = _hdrstart h; hb = _hdrstop h in (a >= ha && a <= hb) || (b >= ha && b <= hb) || (a <= ha && b >= hb) --- UncalC14 -renderUncalC14WithoutName :: UncalC14 -> String -renderUncalC14WithoutName (UncalC14 _ bp sigma) = show bp ++ "±" ++ show sigma ++ "BP" - -renderUncalC14 :: UncalC14 -> String -renderUncalC14 (UncalC14 name bp sigma) = name ++ ":" ++ show bp ++ "±" ++ show sigma ++ "BP" - --- | Read uncalibrated radiocarbon dates from a file. The file should feature one radiocarbon date --- per line in the form "\,\,\", where --- \ is optional. A valid file could look like this: --- --- @ --- Sample1,5000,30 --- 6000,50 --- Sample3,4000,25 --- @ --- -readUncalC14FromFile :: FilePath -> IO [UncalC14] -readUncalC14FromFile uncalFile = do - s <- readFile uncalFile - case P.runParser uncalC14SepByNewline () "" s of - Left err -> throwIO $ CurrycarbonCLIParsingException $ show err - Right x -> return x - where - uncalC14SepByNewline :: P.Parser [UncalC14] - uncalC14SepByNewline = P.endBy parseUncalC14 (P.newline <* P.spaces) <* P.eof - -readUncalC14 :: String -> Either String [UncalC14] -readUncalC14 s = - case P.runParser uncalC14SepBySemicolon () "" s of - Left err -> Left $ renderCurrycarbonException $ CurrycarbonCLIParsingException $ show err - Right x -> Right x - where - uncalC14SepBySemicolon :: P.Parser [UncalC14] - uncalC14SepBySemicolon = P.sepBy parseUncalC14 (P.char ';' <* P.spaces) <* P.eof - -parseUncalC14 :: P.Parser UncalC14 -parseUncalC14 = do - P.try long P.<|> short - where - long = do - name <- P.many (P.noneOf ",") - _ <- P.oneOf "," - mean <- read <$> P.many1 P.digit - _ <- P.oneOf "," - std <- read <$> P.many1 P.digit - return (UncalC14 name mean std) - short = do - mean <- read <$> P.many1 P.digit - _ <- P.oneOf "," - std <- read <$> P.many1 P.digit - return (UncalC14 "unknownSampleName" mean std) - -- CalCurve writeCalCurve :: FilePath -> CalCurveBCAD -> IO () writeCalCurve path calCurve = @@ -416,11 +513,11 @@ writeCalCurve path calCurve = renderCalCurve :: CalCurveBCAD -> String renderCalCurve (CalCurveBCAD cals uncals sigmas) = - let header = "calBCAD,uncalBCAD,Sigma\n" + let header = "calYearBCAD\tuncalYearBCAD\tsigma\n" body = map makeRow $ VU.toList $ VU.zip3 cals uncals sigmas in header ++ intercalate "\n" body where - makeRow (x,y,z) = show x ++ "," ++ show y ++ "," ++ show z + makeRow (x,y,z) = show x ++ "\t" ++ show y ++ "\t" ++ show z -- | Read a calibration curve file. The file must adhere to the current version of the -- .c14 file format (e.g. [here](http://intcal.org/curves/intcal20.14c)). Look @@ -446,11 +543,11 @@ parseCalCurve = do parseCalCurveLine :: P.Parser (YearBP, YearBP, YearRange) parseCalCurveLine = do - calBP <- read <$> P.many1 P.digit + calBP <- parseWord _ <- P.oneOf "," - bp <- read <$> P.many1 P.digit + bp <- parseWord _ <- P.oneOf "," - sigma <- read <$> P.many1 P.digit + sigma <- parseWord return (calBP, bp, sigma) comments :: P.Parser String @@ -458,3 +555,46 @@ comments = do _ <- P.string "#" _ <- P.manyTill P.anyChar P.newline return "" + +-- RandomAgeSamples +-- | Write 'RandomAgeSamples's to the file system. The output file is a long .tsv file with the following structure: +-- +-- @ +-- id yearBCAD +-- ... +-- Sample1 -1221 +-- Sample1 -1211 +-- Sample1 -1230 +-- Sample1 -1225 +-- ... +-- Sample2 -3763 +-- Sample2 -3788 +-- Sample2 -3767 +-- Sample2 -3774 +-- ... +-- @ +-- +writeRandomAgeSamples :: FilePath -> [RandomAgeSample] -> IO () +writeRandomAgeSamples path calPDFs = + writeFile path $ + "id\tyearBCAD\n" + ++ renderRandomAgeSamples calPDFs + +writeRandomAgeSample :: FilePath -> RandomAgeSample -> IO () +writeRandomAgeSample path calPDF = + writeFile path $ + "id\tyearBCAD\n" + ++ renderRandomAgeSample calPDF + +appendRandomAgeSample :: FilePath -> RandomAgeSample -> IO () +appendRandomAgeSample path calPDF = + appendFile path $ renderRandomAgeSample calPDF + +renderRandomAgeSamples :: [RandomAgeSample] -> String +renderRandomAgeSamples = concatMap renderRandomAgeSample + +renderRandomAgeSample :: RandomAgeSample -> String +renderRandomAgeSample (RandomAgeSample name samples) = + concatMap makeRow $ VU.toList samples + where + makeRow x = name ++ "\t" ++ show x ++ "\n" diff --git a/src/Currycarbon/SumCalibration.hs b/src/Currycarbon/SumCalibration.hs index 0637a21..83ffe29 100644 --- a/src/Currycarbon/SumCalibration.hs +++ b/src/Currycarbon/SumCalibration.hs @@ -12,6 +12,12 @@ import Data.List (groupBy, sortBy) import Data.Ord (comparing) import qualified Data.Vector.Unboxed as VU +evalNamedCalExpr :: CalibrateDatesConf -> CalCurveBP -> NamedCalExpr -> Either CurrycarbonException CalPDF +evalNamedCalExpr conf curve (NamedCalExpr exprID expr) = + case evalCalExpr conf curve expr of + Left err -> Left err + Right calPDF -> Right calPDF { _calPDFid = exprID } + -- | Evaluate a dating expression by calibrating the individual dates and forming the respective -- sums and products of post-calibration density distributions evalCalExpr :: CalibrateDatesConf -> CalCurveBP -> CalExpr -> Either CurrycarbonException CalPDF @@ -19,6 +25,8 @@ evalCalExpr conf curve calExpr = mapEither id normalizeCalPDF $ evalE calExpr where evalE :: CalExpr -> Either CurrycarbonException CalPDF evalE (UnCalDate a) = calibrateDate conf curve a + evalE (WindowBP a) = Right $ windowBP2CalPDF a + evalE (WindowBCAD a) = Right $ windowBCAD2CalPDF a evalE (CalDate a) = Right a evalE (SumCal a b) = eitherCombinePDFs (+) 0 (evalE a) (evalE b) evalE (ProductCal a b) = mapEither id normalizeCalPDF $ eitherCombinePDFs (*) 1 @@ -37,11 +45,11 @@ eitherCombinePDFs _ _ (Left e) _ = Left e eitherCombinePDFs _ _ _ (Left e) = Left e eitherCombinePDFs f initVal (Right a) (Right b) = Right $ combinePDFs f initVal a b --- | Add two probabilty densities +-- | Add two probability densities addPDFs :: CalPDF -> CalPDF -> CalPDF addPDFs = combinePDFs (+) 0 --- | Multiply two probabilty densities +-- | Multiply two probability densities multiplyPDFs :: CalPDF -> CalPDF -> CalPDF multiplyPDFs = combinePDFs (*) 1 @@ -59,7 +67,7 @@ combinePDFs f initVal (CalPDF name1 cals1 dens1) (CalPDF name2 cals2 dens2) = pdfSorted = sortBy (comparing fst) (c1 ++ c2) pdfGrouped = groupBy (\a b -> fst a == fst b) pdfSorted pdfRes = map foldYearGroup pdfGrouped - in CalPDF (name1 ++ ":" ++ name2) (VU.fromList $ map fst pdfRes) (VU.fromList $ map snd pdfRes) + in CalPDF (name1 ++ ";" ++ name2) (VU.fromList $ map fst pdfRes) (VU.fromList $ map snd pdfRes) where getMiss :: YearBCAD -> YearBCAD -> YearBCAD -> YearBCAD -> [YearBCAD] getMiss a1 a2 b1 b2 @@ -69,3 +77,14 @@ combinePDFs f initVal (CalPDF name1 cals1 dens1) (CalPDF name2 cals2 dens2) = | otherwise = [] foldYearGroup :: [(YearBCAD, Float)] -> (YearBCAD, Float) foldYearGroup oneYear = (fst $ head oneYear, foldl' f initVal $ map snd oneYear) + +-- | Create pseudo-CalPDF from RangeBCAD +windowBCAD2CalPDF :: TimeWindowBCAD -> CalPDF +windowBCAD2CalPDF (TimeWindowBCAD name start stop) = + let years = VU.fromList $ [start..stop] + dens = VU.replicate (VU.length years) 1 + in CalPDF name years dens + +windowBP2CalPDF :: TimeWindowBP -> CalPDF +windowBP2CalPDF (TimeWindowBP name start stop) = + windowBCAD2CalPDF (TimeWindowBCAD name (bp2BCAD start) (bp2BCAD stop)) diff --git a/src/Currycarbon/Types.hs b/src/Currycarbon/Types.hs index fa261dc..6bb69d0 100644 --- a/src/Currycarbon/Types.hs +++ b/src/Currycarbon/Types.hs @@ -110,15 +110,31 @@ data CalPDF = CalPDF { , _calPDFDens :: VU.Vector Float } deriving (Show, Eq) +-- | A data type for named calibration expressions +data NamedCalExpr = NamedCalExpr { + -- | Expression identifier + _exprID :: String + -- | Expression + , _expr :: CalExpr + } deriving (Show, Eq) + -- | A data type to represent an expression for sum- or product calibration data CalExpr = UnCalDate UncalC14 + | WindowBP TimeWindowBP + | WindowBCAD TimeWindowBCAD | CalDate CalPDF | SumCal CalExpr CalExpr | ProductCal CalExpr CalExpr - deriving Show + deriving (Show, Eq) -- http://www.cse.chalmers.se/edu/year/2018/course/TDA452/lectures/RecursiveDataTypes.html +data TimeWindowBP = TimeWindowBP String YearBP YearBP + deriving (Show, Eq) + +data TimeWindowBCAD = TimeWindowBCAD String YearBCAD YearBCAD + deriving (Show, Eq) + -- | A data type to represent a human readable summary of a calibrated radiocarbon date data CalC14 = CalC14 { -- | Identifier, e.g. a lab number @@ -148,7 +164,7 @@ data CalRangeSummary = CalRangeSummary { -- | A data type to represent a high density region of a probability distribution. -- A high density region is here defined as an age range, within which the respective --- cummulative probability (e.g. of an calibrated radiocarbon date density curve) +-- cumulative probability (e.g. of an calibrated radiocarbon date density curve) -- is above a certain threshold data HDR = HDR { -- | Start of the high density region in years calBCAD @@ -156,3 +172,11 @@ data HDR = HDR { -- | End of the high density region in years calBCAD , _hdrstop :: YearBCAD } deriving (Show, Eq) + +-- | A data type to store random samples drawn from a calPDF +data RandomAgeSample = RandomAgeSample { + -- | Identifier + _rasId :: String + -- | Random samples + , _rasSamples :: VU.Vector YearBCAD + } deriving Show diff --git a/src/Currycarbon/Utils.hs b/src/Currycarbon/Utils.hs index b9fe8b2..193c4c0 100644 --- a/src/Currycarbon/Utils.hs +++ b/src/Currycarbon/Utils.hs @@ -9,7 +9,8 @@ import Control.Exception (Exception) data CurrycarbonException = CurrycarbonCLIParsingException String -- ^ An exception to describe an issue in the currycarbon CLI input parsing | CurrycarbonCalibrationRangeException String -- ^ An exection to describe the case that a - -- date is not in the range of the supplied calibration curve + -- date is not in the range of the supplied calibration curve + | CurrycarbonCLIException String deriving (Show) instance Exception CurrycarbonException @@ -19,3 +20,6 @@ renderCurrycarbonException (CurrycarbonCLIParsingException s) = " Error: Input can not be parsed\n" ++ s renderCurrycarbonException (CurrycarbonCalibrationRangeException s) = s ++ " Error: Date outside of calibration range" +renderCurrycarbonException (CurrycarbonCLIException s) = + " Error: " ++ s + diff --git a/stack.yaml b/stack.yaml index 8319125..4a75105 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,4 +1,4 @@ -resolver: lts-20.24 +resolver: lts-21.17 packages: - . diff --git a/stack.yaml.lock b/stack.yaml.lock index 84da166..65aa9a9 100644 --- a/stack.yaml.lock +++ b/stack.yaml.lock @@ -6,7 +6,7 @@ packages: [] snapshots: - completed: - sha256: e019cd29e3f7f9dbad500225829a3f7a50f73c674614f2f452e21bb8bf5d99ea - size: 650253 - url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/20/24.yaml - original: lts-20.24 + sha256: 85d2382958c178491d3fe50d770a624621f5ab456beef7d31ac7521f780c9bc7 + size: 640042 + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/21/17.yaml + original: lts-21.17 diff --git a/test/GoldenSpec.hs b/test/GoldenSpec.hs new file mode 100644 index 0000000..f9e9e86 --- /dev/null +++ b/test/GoldenSpec.hs @@ -0,0 +1,61 @@ +module GoldenSpec (spec) where + +import Control.Applicative +import Control.Monad +import System.IO +import System.Process +import Test.Hspec (Spec, describe, it, shouldBe) + +spec :: Spec +spec = goldenTest + +goldenTest :: Spec +goldenTest = + + describe "currycarbon cli test" $ do + + let stdout_stderr_tests = [ + "single_radiocarbon_date" + ] + + runStdoutStderrTests stdout_stderr_tests + + let file_output_tests = [ + "density_file" + , "hdr_file" + , "samples_file" + , "cal_curve_seg_file" + ] + + runFileOutputTests file_output_tests + +runStdoutStderrTests :: [String] -> Spec +runStdoutStderrTests tests = do + forM_ tests $ \test -> do + it (test ++ " should yield the correct stdout stderr output") $ do + let cp = (shell ("bash " ++ test ++ ".sh")) { + cwd = Just "test/golden", + std_out = CreatePipe, + std_err = CreatePipe + } + (_, Just out, Just err, _) <- createProcess cp + hSetBuffering out NoBuffering + hSetBuffering err NoBuffering + outActually <- liftA2 (++) (hGetContents err) (hGetContents out) + outExpected <- readFile $ "test/golden/expected_data/" ++ test ++ ".out" + outActually `shouldBe` outExpected + +runFileOutputTests :: [String] -> Spec +runFileOutputTests tests = do + forM_ tests $ \(test) -> do + it (test ++ " should produce the correct output file") $ do + let cp = (shell ("bash " ++ test ++ ".sh")) { + cwd = Just "test/golden", + std_out = CreatePipe, + std_err = CreatePipe + } + (_, _, _, exitCode) <- createProcess cp + _ <- waitForProcess exitCode + outActually <- readFile ("test/golden/actual_data/" ++ test ++ ".tsv") + outExpected <- readFile ("test/golden/expected_data/" ++ test ++ ".tsv") + outActually `shouldBe` outExpected diff --git a/test/ParserSpec.hs b/test/ParserSpec.hs new file mode 100644 index 0000000..b55efe0 --- /dev/null +++ b/test/ParserSpec.hs @@ -0,0 +1,117 @@ +module ParserSpec (spec) where + +import Currycarbon.Parsers +import Currycarbon.Types + +import Test.Hspec (Spec, describe, it, shouldBe) + +spec :: Spec +spec = do + testReadNamedExpression + +uncalC14N :: String -> CalExpr +uncalC14N s = UnCalDate (UncalC14 s 3000 30) +windowBPN :: String -> CalExpr +windowBPN s = WindowBP (TimeWindowBP s 3000 2000) +windowBCADN :: String -> CalExpr +windowBCADN s = WindowBCAD (TimeWindowBCAD s (-1050) (-50)) +uncalC14 :: CalExpr +uncalC14 = uncalC14N "" +windowBP :: CalExpr +windowBP = windowBPN "" +windowBCAD :: CalExpr +windowBCAD = windowBCADN "" + +testReadNamedExpression :: Spec +testReadNamedExpression = + describe "Currycarbon.Parsers.readOneNamedCalExpr" $ do + it "should read uncalibrated C14 dates correctly" $ do + readOneNamedCalExpr "3000,30" + `shouldBe` + Right (NamedCalExpr "" uncalC14) + readOneNamedCalExpr "uncalC14(3000,30)" + `shouldBe` + Right (NamedCalExpr "" uncalC14) + readOneNamedCalExpr "uncalC14(test,3000,30)" + `shouldBe` + Right (NamedCalExpr "" (uncalC14N "test")) + it "should read named function arguments correctly" $ do + readOneNamedCalExpr "uncalC14(id = test, yearBP = 3000, sigma = 30)" + `shouldBe` + Right (NamedCalExpr "" (uncalC14N "test")) + it "should read partially named function arguments correctly" $ do + readOneNamedCalExpr "uncalC14(3000,sigma=30)" + `shouldBe` + Right (NamedCalExpr "" uncalC14) + it "should read time windows correctly" $ do + readOneNamedCalExpr "rangeBP(3000,2000)" + `shouldBe` + Right (NamedCalExpr "" windowBP) + readOneNamedCalExpr "rangeBCAD(-1050,-50)" + `shouldBe` + Right (NamedCalExpr "" windowBCAD) + readOneNamedCalExpr "rangeBP(test,3000,2000)" + `shouldBe` + Right (NamedCalExpr "" (windowBPN "test")) + readOneNamedCalExpr "rangeBCAD(test,-1050,-50)" + `shouldBe` + Right (NamedCalExpr "" (windowBCADN "test")) + it "should read sums with + operator correctly " $ do + readOneNamedCalExpr "uncalC14(3000,30) + rangeBP(3000,2000)" + `shouldBe` + Right (NamedCalExpr "" $ SumCal uncalC14 windowBP) + readOneNamedCalExpr "uncalC14(3000,30) + rangeBP(3000,2000) + rangeBCAD(-1050,-50)" + `shouldBe` + Right (NamedCalExpr "" $ SumCal uncalC14 (SumCal windowBP windowBCAD)) + readOneNamedCalExpr "uncalC14(3000,30) + rangeBP(3000,2000) + rangeBCAD(-1050,-50) + uncalC14(3000,30)" + `shouldBe` + Right (NamedCalExpr "" $ SumCal uncalC14 (SumCal windowBP (SumCal windowBCAD uncalC14))) + it "should read sums with sum() function and + operator correctly " $ do + readOneNamedCalExpr "sum(uncalC14(3000,30), rangeBP(3000,2000))" + `shouldBe` + Right (NamedCalExpr "" $ SumCal uncalC14 windowBP) + readOneNamedCalExpr "sum(uncalC14(3000,30), rangeBP(3000,2000)) + rangeBCAD(-1050,-50)" + `shouldBe` + Right (NamedCalExpr "" $ SumCal (SumCal uncalC14 windowBP) windowBCAD) + readOneNamedCalExpr "uncalC14(3000,30) + sum(rangeBP(3000,2000), rangeBCAD(-1050,-50)) + uncalC14(3000,30)" + `shouldBe` + Right (NamedCalExpr "" $ SumCal uncalC14 (SumCal (SumCal windowBP windowBCAD) uncalC14)) + it "should read products with * operator correctly " $ do + readOneNamedCalExpr "uncalC14(3000,30) * rangeBP(3000,2000)" + `shouldBe` + Right (NamedCalExpr "" $ ProductCal uncalC14 windowBP) + readOneNamedCalExpr "uncalC14(3000,30) * rangeBP(3000,2000) * rangeBCAD(-1050,-50)" + `shouldBe` + Right (NamedCalExpr "" $ ProductCal uncalC14 (ProductCal windowBP windowBCAD)) + readOneNamedCalExpr "uncalC14(3000,30) * rangeBP(3000,2000) * rangeBCAD(-1050,-50) * uncalC14(3000,30)" + `shouldBe` + Right (NamedCalExpr "" $ ProductCal uncalC14 (ProductCal windowBP (ProductCal windowBCAD uncalC14))) + it "should read products with product() function and * operator correctly " $ do + readOneNamedCalExpr "product(uncalC14(3000,30), rangeBP(3000,2000))" + `shouldBe` + Right (NamedCalExpr "" $ ProductCal uncalC14 windowBP) + readOneNamedCalExpr "product(uncalC14(3000,30), rangeBP(3000,2000)) * rangeBCAD(-1050,-50)" + `shouldBe` + Right (NamedCalExpr "" $ ProductCal (ProductCal uncalC14 windowBP) windowBCAD) + readOneNamedCalExpr "uncalC14(3000,30) * product(rangeBP(3000,2000), rangeBCAD(-1050,-50)) * uncalC14(3000,30)" + `shouldBe` + Right (NamedCalExpr "" $ ProductCal uncalC14 (ProductCal (ProductCal windowBP windowBCAD) uncalC14)) + it "should understand parenthesis correctly" $ do + readOneNamedCalExpr "(uncalC14(3000,30) + rangeBP(3000,2000)) * rangeBCAD(-1050,-50)" + `shouldBe` + Right (NamedCalExpr "" $ ProductCal (SumCal uncalC14 windowBP) windowBCAD) + it "should read unnamed and named calibration expressions correctly" $ do + readOneNamedCalExpr "test: 3000,30" + `shouldBe` + Right (NamedCalExpr "test" uncalC14) + readOneNamedCalExpr "calExpr(test,3000,30)" + `shouldBe` + Right (NamedCalExpr "test" uncalC14) + readOneNamedCalExpr "calExpr(3000,30)" + `shouldBe` + Right (NamedCalExpr "" uncalC14) + it "should be able to handle complex, nested queries" $ do + readOneNamedCalExpr "calExpr(id = test, sum(uncalC14(3000,30), product(rangeBP(3000,2000), rangeBCAD(-1050,-50))) * uncalC14(3000,30))" + `shouldBe` + Right (NamedCalExpr "test" $ ProductCal (SumCal uncalC14 (ProductCal windowBP windowBCAD)) uncalC14) + diff --git a/test/Spec.hs b/test/Spec.hs new file mode 100644 index 0000000..a824f8c --- /dev/null +++ b/test/Spec.hs @@ -0,0 +1 @@ +{-# OPTIONS_GHC -F -pgmF hspec-discover #-} diff --git a/test/golden/actual_data/cal_curve_seg_file.tsv b/test/golden/actual_data/cal_curve_seg_file.tsv new file mode 100644 index 0000000..0bf79a8 --- /dev/null +++ b/test/golden/actual_data/cal_curve_seg_file.tsv @@ -0,0 +1,503 @@ +calYearBCAD uncalYearBCAD sigma +-1441 -1229 17 +-1440 -1227 17 +-1439 -1226 17 +-1438 -1224 17 +-1437 -1222 17 +-1436 -1221 17 +-1435 -1219 16 +-1434 -1218 16 +-1433 -1217 16 +-1432 -1215 16 +-1431 -1214 16 +-1430 -1212 16 +-1429 -1210 17 +-1428 -1209 17 +-1427 -1207 18 +-1426 -1205 18 +-1425 -1204 19 +-1424 -1202 19 +-1423 -1200 19 +-1422 -1198 19 +-1421 -1196 19 +-1420 -1193 19 +-1419 -1191 19 +-1418 -1189 18 +-1417 -1186 18 +-1416 -1183 17 +-1415 -1181 17 +-1414 -1178 17 +-1413 -1176 17 +-1412 -1173 17 +-1411 -1171 17 +-1410 -1168 17 +-1409 -1166 18 +-1408 -1164 18 +-1407 -1162 18 +-1406 -1160 18 +-1405 -1158 18 +-1404 -1157 18 +-1403 -1155 18 +-1402 -1154 17 +-1401 -1153 17 +-1400 -1152 17 +-1399 -1151 16 +-1398 -1150 16 +-1397 -1149 16 +-1396 -1148 16 +-1395 -1147 17 +-1394 -1146 17 +-1393 -1144 17 +-1392 -1143 18 +-1391 -1142 18 +-1390 -1140 18 +-1389 -1139 18 +-1388 -1137 17 +-1387 -1135 17 +-1386 -1133 17 +-1385 -1131 17 +-1384 -1129 17 +-1383 -1127 17 +-1382 -1125 17 +-1381 -1123 18 +-1380 -1120 18 +-1379 -1118 19 +-1378 -1116 19 +-1377 -1114 19 +-1376 -1112 19 +-1375 -1110 19 +-1374 -1108 19 +-1373 -1106 19 +-1372 -1104 18 +-1371 -1102 18 +-1370 -1101 18 +-1369 -1099 17 +-1368 -1098 17 +-1367 -1097 18 +-1366 -1096 18 +-1365 -1095 18 +-1364 -1094 19 +-1363 -1094 19 +-1362 -1094 19 +-1361 -1094 19 +-1360 -1094 19 +-1359 -1095 19 +-1358 -1096 18 +-1357 -1097 18 +-1356 -1098 17 +-1355 -1099 17 +-1354 -1101 16 +-1353 -1102 16 +-1352 -1104 16 +-1351 -1106 16 +-1350 -1107 16 +-1349 -1109 16 +-1348 -1110 16 +-1347 -1112 16 +-1346 -1113 16 +-1345 -1115 16 +-1344 -1116 16 +-1343 -1118 16 +-1342 -1119 16 +-1341 -1121 16 +-1340 -1124 16 +-1339 -1126 16 +-1338 -1129 16 +-1337 -1133 15 +-1336 -1136 15 +-1335 -1139 15 +-1334 -1142 16 +-1333 -1145 16 +-1332 -1147 16 +-1331 -1148 15 +-1330 -1149 15 +-1329 -1150 15 +-1328 -1150 15 +-1327 -1149 15 +-1326 -1148 16 +-1325 -1147 16 +-1324 -1145 16 +-1323 -1143 16 +-1322 -1141 15 +-1321 -1138 15 +-1320 -1136 15 +-1319 -1134 15 +-1318 -1132 15 +-1317 -1131 15 +-1316 -1130 15 +-1315 -1129 15 +-1314 -1128 15 +-1313 -1127 15 +-1312 -1126 15 +-1311 -1125 15 +-1310 -1124 15 +-1309 -1124 15 +-1308 -1122 15 +-1307 -1121 15 +-1306 -1120 15 +-1305 -1118 15 +-1304 -1116 15 +-1303 -1114 15 +-1302 -1112 15 +-1301 -1109 15 +-1300 -1107 15 +-1299 -1105 15 +-1298 -1103 15 +-1297 -1102 15 +-1296 -1101 16 +-1295 -1100 16 +-1294 -1099 15 +-1293 -1098 15 +-1292 -1098 15 +-1291 -1097 15 +-1290 -1097 15 +-1289 -1096 15 +-1288 -1096 15 +-1287 -1095 14 +-1286 -1094 14 +-1285 -1094 14 +-1284 -1093 14 +-1283 -1092 14 +-1282 -1090 14 +-1281 -1089 15 +-1280 -1088 15 +-1279 -1087 15 +-1278 -1086 15 +-1277 -1085 15 +-1276 -1084 15 +-1275 -1084 15 +-1274 -1084 15 +-1273 -1083 15 +-1272 -1083 15 +-1271 -1083 15 +-1270 -1082 15 +-1269 -1082 15 +-1268 -1081 15 +-1267 -1080 15 +-1266 -1078 15 +-1265 -1076 15 +-1264 -1074 14 +-1263 -1072 15 +-1262 -1070 15 +-1261 -1067 15 +-1260 -1064 15 +-1259 -1061 15 +-1258 -1058 14 +-1257 -1055 14 +-1256 -1053 14 +-1255 -1050 14 +-1254 -1048 15 +-1253 -1047 15 +-1252 -1046 15 +-1251 -1045 15 +-1250 -1046 15 +-1249 -1047 15 +-1248 -1048 15 +-1247 -1049 15 +-1246 -1051 15 +-1245 -1052 15 +-1244 -1054 15 +-1243 -1056 15 +-1242 -1057 15 +-1241 -1059 15 +-1240 -1060 15 +-1239 -1061 15 +-1238 -1061 15 +-1237 -1061 15 +-1236 -1061 15 +-1235 -1061 15 +-1234 -1060 15 +-1233 -1059 15 +-1232 -1058 15 +-1231 -1057 15 +-1230 -1056 15 +-1229 -1055 15 +-1228 -1054 15 +-1227 -1053 15 +-1226 -1051 15 +-1225 -1050 15 +-1224 -1049 15 +-1223 -1048 15 +-1222 -1046 15 +-1221 -1044 15 +-1220 -1042 15 +-1219 -1040 15 +-1218 -1037 15 +-1217 -1034 15 +-1216 -1032 15 +-1215 -1029 15 +-1214 -1026 16 +-1213 -1024 16 +-1212 -1021 17 +-1211 -1019 17 +-1210 -1018 18 +-1209 -1016 18 +-1208 -1015 18 +-1207 -1014 18 +-1206 -1014 18 +-1205 -1013 18 +-1204 -1013 19 +-1203 -1012 19 +-1202 -1011 19 +-1201 -1011 20 +-1200 -1010 20 +-1199 -1009 20 +-1198 -1007 20 +-1197 -1006 20 +-1196 -1004 19 +-1195 -1003 19 +-1194 -1001 18 +-1193 -999 18 +-1192 -998 18 +-1191 -996 17 +-1190 -995 17 +-1189 -993 18 +-1188 -992 18 +-1187 -991 18 +-1186 -990 19 +-1185 -989 19 +-1184 -988 20 +-1183 -988 20 +-1182 -988 20 +-1181 -989 20 +-1180 -989 20 +-1179 -991 20 +-1178 -992 20 +-1177 -994 19 +-1176 -995 19 +-1175 -997 19 +-1174 -999 18 +-1173 -1001 18 +-1172 -1003 18 +-1171 -1005 18 +-1170 -1006 18 +-1169 -1007 19 +-1168 -1008 19 +-1167 -1009 19 +-1166 -1009 19 +-1165 -1009 18 +-1164 -1008 18 +-1163 -1007 17 +-1162 -1005 17 +-1161 -1003 16 +-1160 -1001 16 +-1159 -999 16 +-1158 -997 16 +-1157 -995 16 +-1156 -993 16 +-1155 -990 15 +-1154 -988 15 +-1153 -987 15 +-1152 -985 14 +-1151 -984 14 +-1150 -984 15 +-1149 -984 15 +-1148 -985 15 +-1147 -987 15 +-1146 -989 15 +-1145 -993 14 +-1144 -996 14 +-1143 -1000 14 +-1142 -1003 15 +-1141 -1006 15 +-1140 -1009 15 +-1139 -1011 15 +-1138 -1013 15 +-1137 -1014 15 +-1136 -1014 14 +-1135 -1014 14 +-1134 -1013 15 +-1133 -1012 15 +-1132 -1010 15 +-1131 -1008 15 +-1130 -1005 15 +-1129 -1002 15 +-1128 -999 15 +-1127 -996 15 +-1126 -992 15 +-1125 -989 15 +-1124 -985 15 +-1123 -982 15 +-1122 -979 16 +-1121 -976 16 +-1120 -973 15 +-1119 -971 15 +-1118 -969 15 +-1117 -967 15 +-1116 -965 15 +-1115 -963 15 +-1114 -962 15 +-1113 -960 15 +-1112 -959 15 +-1111 -957 15 +-1110 -956 15 +-1109 -954 15 +-1108 -953 15 +-1107 -951 16 +-1106 -950 16 +-1105 -949 16 +-1104 -948 17 +-1103 -948 17 +-1102 -947 18 +-1101 -947 18 +-1100 -947 18 +-1099 -948 18 +-1098 -948 17 +-1097 -949 17 +-1096 -950 17 +-1095 -950 16 +-1094 -951 16 +-1093 -952 16 +-1092 -953 17 +-1091 -953 17 +-1090 -954 17 +-1089 -955 18 +-1088 -955 18 +-1087 -955 18 +-1086 -955 18 +-1085 -954 18 +-1084 -954 18 +-1083 -953 18 +-1082 -953 17 +-1081 -952 17 +-1080 -951 17 +-1079 -950 17 +-1078 -949 17 +-1077 -949 17 +-1076 -948 18 +-1075 -947 18 +-1074 -947 18 +-1073 -947 18 +-1072 -947 18 +-1071 -947 18 +-1070 -948 18 +-1069 -949 17 +-1068 -950 17 +-1067 -951 17 +-1066 -952 17 +-1065 -953 17 +-1064 -954 17 +-1063 -954 17 +-1062 -955 17 +-1061 -956 17 +-1060 -956 18 +-1059 -956 18 +-1058 -955 18 +-1057 -954 18 +-1056 -953 18 +-1055 -952 18 +-1054 -950 18 +-1053 -947 17 +-1052 -945 17 +-1051 -942 16 +-1050 -939 16 +-1049 -936 16 +-1048 -933 16 +-1047 -930 16 +-1046 -927 16 +-1045 -925 17 +-1044 -922 17 +-1043 -920 17 +-1042 -918 18 +-1041 -917 18 +-1040 -916 18 +-1039 -915 18 +-1038 -915 18 +-1037 -915 18 +-1036 -916 18 +-1035 -917 17 +-1034 -918 17 +-1033 -919 17 +-1032 -920 16 +-1031 -922 16 +-1030 -923 16 +-1029 -924 16 +-1028 -926 17 +-1027 -927 17 +-1026 -928 17 +-1025 -929 17 +-1024 -930 18 +-1023 -930 18 +-1022 -930 18 +-1021 -929 18 +-1020 -928 18 +-1019 -927 18 +-1018 -925 17 +-1017 -923 17 +-1016 -921 16 +-1015 -918 16 +-1014 -915 16 +-1013 -912 15 +-1012 -909 15 +-1011 -906 15 +-1010 -902 15 +-1009 -899 16 +-1008 -896 16 +-1007 -893 16 +-1006 -890 17 +-1005 -887 17 +-1004 -885 17 +-1003 -883 17 +-1002 -881 17 +-1001 -880 17 +-1000 -879 17 +-999 -878 17 +-998 -878 16 +-997 -877 16 +-996 -877 16 +-995 -877 16 +-994 -877 17 +-993 -877 17 +-992 -877 17 +-991 -877 18 +-990 -877 18 +-989 -877 18 +-988 -876 18 +-987 -876 18 +-986 -875 18 +-985 -875 17 +-984 -874 17 +-983 -873 16 +-982 -872 16 +-981 -871 16 +-980 -869 16 +-979 -868 16 +-978 -867 16 +-977 -866 16 +-976 -864 17 +-975 -863 17 +-974 -862 17 +-973 -860 17 +-972 -859 17 +-971 -858 17 +-970 -857 17 +-969 -856 16 +-968 -855 16 +-967 -855 16 +-966 -854 16 +-965 -853 16 +-964 -853 16 +-963 -853 17 +-962 -853 17 +-961 -853 17 +-960 -853 18 +-959 -854 18 +-958 -854 18 +-957 -855 18 +-956 -856 18 +-955 -857 18 +-954 -859 17 +-953 -860 17 +-952 -862 16 +-951 -863 16 +-950 -865 16 +-949 -866 16 +-948 -867 16 +-947 -869 16 +-946 -870 16 +-945 -871 16 +-944 -872 17 +-943 -872 17 +-942 -872 17 +-941 -872 17 +-940 -872 18 \ No newline at end of file diff --git a/test/golden/actual_data/density_file.tsv b/test/golden/actual_data/density_file.tsv new file mode 100644 index 0000000..fe33569 --- /dev/null +++ b/test/golden/actual_data/density_file.tsv @@ -0,0 +1,399 @@ +id yearBCAD density +1 -1414 1.2485408e-5 +1 -1413 1.5097789e-5 +1 -1412 1.9989839e-5 +1 -1411 2.4033061e-5 +1 -1410 3.154145e-5 +1 -1409 4.366613e-5 +1 -1408 5.182399e-5 +1 -1407 6.135777e-5 +1 -1406 7.2468654e-5 +1 -1405 8.5380896e-5 +1 -1404 9.258939e-5 +1 -1403 1.08679174e-4 +1 -1402 1.0427339e-4 +1 -1401 1.130317e-4 +1 -1400 1.2244577e-4 +1 -1399 1.1830085e-4 +1 -1398 1.2825013e-4 +1 -1397 1.389422e-4 +1 -1396 1.504234e-4 +1 -1395 1.8086852e-4 +1 -1394 1.9515397e-4 +1 -1393 2.2673856e-4 +1 -1392 2.6929172e-4 +1 -1391 2.8920497e-4 +1 -1390 3.3288536e-4 +1 -1389 3.56778e-4 +1 -1388 3.7510364e-4 +1 -1387 4.3041562e-4 +1 -1386 4.9248507e-4 +1 -1385 5.6189613e-4 +1 -1384 6.392443e-4 +1 -1383 7.2512927e-4 +1 -1382 8.201471e-4 +1 -1381 9.840226e-4 +1 -1380 1.1662486e-3 +1 -1379 1.3735098e-3 +1 -1378 1.5234945e-3 +1 -1377 1.6849806e-3 +1 -1376 1.8581729e-3 +1 -1375 2.0431816e-3 +1 -1374 2.2400115e-3 +1 -1373 2.448548e-3 +1 -1372 2.5780778e-3 +1 -1371 2.8082137e-3 +1 -1370 2.927483e-3 +1 -1369 3.084582e-3 +1 -1368 3.2117483e-3 +1 -1367 3.4306804e-3 +1 -1366 3.5625e-3 +1 -1365 3.6964875e-3 +1 -1364 3.921976e-3 +1 -1363 3.921976e-3 +1 -1362 3.921976e-3 +1 -1361 3.921976e-3 +1 -1360 3.921976e-3 +1 -1359 3.786735e-3 +1 -1358 3.5625e-3 +1 -1357 3.4306804e-3 +1 -1356 3.2117483e-3 +1 -1355 3.084582e-3 +1 -1354 2.7521139e-3 +1 -1353 2.6336822e-3 +1 -1352 2.4060064e-3 +1 -1351 2.1909003e-3 +1 -1350 2.0881426e-3 +1 -1349 1.8922987e-3 +1 -1348 1.7992173e-3 +1 -1347 1.6226898e-3 +1 -1346 1.5392012e-3 +1 -1345 1.3816209e-3 +1 -1344 1.3074493e-3 +1 -1343 1.1681007e-3 +1 -1342 1.1028139e-3 +1 -1341 9.807099e-4 +1 -1340 8.1771566e-4 +1 -1339 7.2165084e-4 +1 -1338 5.9492886e-4 +1 -1337 4.2088976e-4 +1 -1336 3.398045e-4 +1 -1335 2.7252332e-4 +1 -1334 2.3873866e-4 +1 -1333 1.9009615e-4 +1 -1332 1.6274204e-4 +1 -1331 1.3520932e-4 +1 -1330 1.2463809e-4 +1 -1329 1.1481369e-4 +1 -1328 1.1481369e-4 +1 -1327 1.2463809e-4 +1 -1326 1.504234e-4 +1 -1325 1.6274204e-4 +1 -1324 1.9009615e-4 +1 -1323 2.2143334e-4 +1 -1322 2.3438824e-4 +1 -1321 2.9353763e-4 +1 -1320 3.398045e-4 +1 -1319 3.9220374e-4 +1 -1318 4.51336e-4 +1 -1317 4.836211e-4 +1 -1316 5.178245e-4 +1 -1315 5.540268e-4 +1 -1314 5.9230917e-4 +1 -1313 6.327529e-4 +1 -1312 6.754396e-4 +1 -1311 7.204508e-4 +1 -1310 7.6786714e-4 +1 -1309 7.6786714e-4 +1 -1308 8.70235e-4 +1 -1307 9.2534255e-4 +1 -1306 9.831673e-4 +1 -1305 1.1072573e-3 +1 -1304 1.243054e-3 +1 -1303 1.3910474e-3 +1 -1302 1.5516536e-3 +1 -1301 1.8168977e-3 +1 -1300 2.010228e-3 +1 -1299 2.2168152e-3 +1 -1298 2.4365452e-3 +1 -1297 2.551259e-3 +1 -1296 2.7521139e-3 +1 -1295 2.8735236e-3 +1 -1294 2.9141635e-3 +1 -1293 3.0411426e-3 +1 -1292 3.0411426e-3 +1 -1291 3.170975e-3 +1 -1290 3.170975e-3 +1 -1289 3.303553e-3 +1 -1288 3.303553e-3 +1 -1287 3.358479e-3 +1 -1286 3.4965212e-3 +1 -1285 3.4965212e-3 +1 -1284 3.6370563e-3 +1 -1283 3.7799273e-3 +1 -1282 4.071976e-3 +1 -1281 4.296666e-3 +1 -1280 4.4457754e-3 +1 -1279 4.5960867e-3 +1 -1278 4.747369e-3 +1 -1277 4.8993793e-3 +1 -1276 5.0518652e-3 +1 -1275 5.0518652e-3 +1 -1274 5.0518652e-3 +1 -1273 5.204564e-3 +1 -1272 5.204564e-3 +1 -1271 5.204564e-3 +1 -1270 5.3572045e-3 +1 -1269 5.3572045e-3 +1 -1268 5.509506e-3 +1 -1267 5.661181e-3 +1 -1266 5.9614684e-3 +1 -1265 6.255645e-3 +1 -1264 6.4968583e-3 +1 -1263 6.815701e-3 +1 -1262 7.0765605e-3 +1 -1261 7.436963e-3 +1 -1260 7.7533075e-3 +1 -1259 8.018399e-3 +1 -1258 8.219791e-3 +1 -1257 8.368772e-3 +1 -1256 8.430688e-3 +1 -1255 8.46572e-3 +1 -1254 8.450533e-3 +1 -1253 8.431589e-3 +1 -1252 8.40514e-3 +1 -1251 8.371258e-3 +1 -1250 8.40514e-3 +1 -1249 8.431589e-3 +1 -1248 8.450533e-3 +1 -1247 8.4619215e-3 +1 -1246 8.4619215e-3 +1 -1245 8.450533e-3 +1 -1244 8.40514e-3 +1 -1243 8.330035e-3 +1 -1242 8.281585e-3 +1 -1241 8.1635425e-3 +1 -1240 8.094268e-3 +1 -1239 8.018399e-3 +1 -1238 8.018399e-3 +1 -1237 8.018399e-3 +1 -1236 8.018399e-3 +1 -1235 8.018399e-3 +1 -1234 8.094268e-3 +1 -1233 8.1635425e-3 +1 -1232 8.226037e-3 +1 -1231 8.281585e-3 +1 -1230 8.330035e-3 +1 -1229 8.371258e-3 +1 -1228 8.40514e-3 +1 -1227 8.431589e-3 +1 -1226 8.4619215e-3 +1 -1225 8.46572e-3 +1 -1224 8.4619215e-3 +1 -1223 8.450533e-3 +1 -1222 8.40514e-3 +1 -1221 8.330035e-3 +1 -1220 8.226037e-3 +1 -1219 8.094268e-3 +1 -1218 7.847695e-3 +1 -1217 7.547679e-3 +1 -1216 7.321346e-3 +1 -1215 6.947988e-3 +1 -1214 6.58651e-3 +1 -1213 6.3064555e-3 +1 -1212 5.930375e-3 +1 -1211 5.637854e-3 +1 -1210 5.558337e-3 +1 -1209 5.2662515e-3 +1 -1208 5.119849e-3 +1 -1207 4.9735317e-3 +1 -1206 4.9735317e-3 +1 -1205 4.8275352e-3 +1 -1204 4.907322e-3 +1 -1203 4.7636973e-3 +1 -1202 4.6206973e-3 +1 -1201 4.704902e-3 +1 -1200 4.5643696e-3 +1 -1199 4.424721e-3 +1 -1198 4.1488023e-3 +1 -1197 4.012877e-3 +1 -1196 3.653369e-3 +1 -1195 3.5220152e-3 +1 -1194 3.1740458e-3 +1 -1193 2.927483e-3 +1 -1192 2.8082137e-3 +1 -1191 2.4904548e-3 +1 -1190 2.3804714e-3 +1 -1189 2.254728e-3 +1 -1188 2.1529314e-3 +1 -1187 2.054169e-3 +1 -1186 2.0431816e-3 +1 -1185 1.9491977e-3 +1 -1184 1.943383e-3 +1 -1183 1.943383e-3 +1 -1182 1.943383e-3 +1 -1181 2.0357415e-3 +1 -1180 2.0357415e-3 +1 -1179 2.2290554e-3 +1 -1178 2.3299854e-3 +1 -1177 2.448548e-3 +1 -1176 2.5571366e-3 +1 -1175 2.7827376e-3 +1 -1174 2.927483e-3 +1 -1173 3.1740458e-3 +1 -1172 3.4306804e-3 +1 -1171 3.6964875e-3 +1 -1170 3.8325028e-3 +1 -1169 4.058939e-3 +1 -1168 4.197468e-3 +1 -1167 4.3373904e-3 +1 -1166 4.3373904e-3 +1 -1165 4.2511714e-3 +1 -1164 4.1100103e-3 +1 -1163 3.8835478e-3 +1 -1162 3.6081513e-3 +1 -1161 3.2547812e-3 +1 -1160 2.9978324e-3 +1 -1159 2.7521139e-3 +1 -1158 2.5182941e-3 +1 -1157 2.2968634e-3 +1 -1156 2.0881426e-3 +1 -1155 1.725199e-3 +1 -1154 1.5516536e-3 +1 -1153 1.4697512e-3 +1 -1152 1.253416e-3 +1 -1151 1.1826834e-3 +1 -1150 1.243054e-3 +1 -1149 1.243054e-3 +1 -1148 1.3154981e-3 +1 -1147 1.4697512e-3 +1 -1146 1.6367924e-3 +1 -1145 1.9362542e-3 +1 -1144 2.2478928e-3 +1 -1143 2.7102758e-3 +1 -1142 3.170975e-3 +1 -1141 3.5764445e-3 +1 -1140 4.002923e-3 +1 -1139 4.296666e-3 +1 -1138 4.5960867e-3 +1 -1137 4.747369e-3 +1 -1136 4.6756812e-3 +1 -1135 4.6756812e-3 +1 -1134 4.5960867e-3 +1 -1133 4.4457754e-3 +1 -1132 4.1489787e-3 +1 -1131 3.8586948e-3 +1 -1130 3.438754e-3 +1 -1129 3.0411426e-3 +1 -1128 2.6691433e-3 +1 -1127 2.3250505e-3 +1 -1126 1.9119045e-3 +1 -1125 1.6367924e-3 +1 -1124 1.3154981e-3 +1 -1123 1.1072573e-3 +1 -1122 9.807099e-4 +1 -1121 8.1771566e-4 +1 -1120 6.327529e-4 +1 -1119 5.540268e-4 +1 -1118 4.836211e-4 +1 -1117 4.2088976e-4 +1 -1116 3.6520066e-4 +1 -1115 3.1594106e-4 +1 -1114 2.9353763e-4 +1 -1113 2.5282917e-4 +1 -1112 2.3438824e-4 +1 -1111 2.0100856e-4 +1 -1110 1.8594635e-4 +1 -1109 1.5878474e-4 +1 -1108 1.4657498e-4 +1 -1107 1.389422e-4 +1 -1106 1.2825013e-4 +1 -1105 1.1830085e-4 +1 -1104 1.2244577e-4 +1 -1103 1.2244577e-4 +1 -1102 1.2724288e-4 +1 -1101 1.2724288e-4 +1 -1100 1.2724288e-4 +1 -1099 1.3755045e-4 +1 -1098 1.2244577e-4 +1 -1097 1.32557e-4 +1 -1096 1.4340869e-4 +1 -1095 1.2825013e-4 +1 -1094 1.389422e-4 +1 -1093 1.504234e-4 +1 -1092 1.8086852e-4 +1 -1091 1.8086852e-4 +1 -1090 1.9515397e-4 +1 -1089 2.330168e-4 +1 -1088 2.330168e-4 +1 -1087 2.330168e-4 +1 -1086 2.330168e-4 +1 -1085 2.1653922e-4 +1 -1084 2.1653922e-4 +1 -1083 2.0109423e-4 +1 -1082 1.8086852e-4 +1 -1081 1.6751625e-4 +1 -1080 1.5504599e-4 +1 -1079 1.4340869e-4 +1 -1078 1.32557e-4 +1 -1077 1.32557e-4 +1 -1076 1.3755045e-4 +1 -1075 1.2724288e-4 +1 -1074 1.2724288e-4 +1 -1073 1.2724288e-4 +1 -1072 1.2724288e-4 +1 -1071 1.2724288e-4 +1 -1070 1.3755045e-4 +1 -1069 1.32557e-4 +1 -1068 1.4340869e-4 +1 -1067 1.5504599e-4 +1 -1066 1.6751625e-4 +1 -1065 1.8086852e-4 +1 -1064 1.9515397e-4 +1 -1063 1.9515397e-4 +1 -1062 2.1042564e-4 +1 -1061 2.2673856e-4 +1 -1060 2.5058194e-4 +1 -1059 2.5058194e-4 +1 -1058 2.330168e-4 +1 -1057 2.1653922e-4 +1 -1056 2.0109423e-4 +1 -1055 1.8662821e-4 +1 -1054 1.6042878e-4 +1 -1053 1.130317e-4 +1 -1052 9.6131356e-5 +1 -1051 6.597864e-5 +1 -1050 5.0871124e-5 +1 -1049 3.8998478e-5 +1 -1048 2.9728637e-5 +1 -1047 2.2536955e-5 +1 -1046 1.6992273e-5 +1 -1045 1.6588087e-5 +1 -1044 1.2485408e-5 +1 -1043 1.0301709e-5 +1 -1042 1.0203481e-5 +1 -1041 9.27303e-6 +1 -1040 8.422884e-6 +1 -1039 7.6465785e-6 +1 -1038 7.6465785e-6 +1 -1037 7.6465785e-6 +1 -1036 8.422884e-6 +1 -1035 7.68871e-6 +1 -1034 8.480968e-6 +1 -1033 9.349705e-6 +1 -1032 8.6125665e-6 +1 -1031 1.04884075e-5 +1 -1030 1.1564427e-5 +1 -1029 1.2743456e-5 +1 -1028 1.8214989e-5 +1 -1027 1.9989839e-5 +1 -1026 2.192485e-5 +1 -1025 2.4033061e-5 +1 -1024 3.0780615e-5 +1 -1023 3.0780615e-5 +1 -1022 3.0780615e-5 +1 -1021 2.8162687e-5 +1 -1020 2.5752486e-5 +1 -1019 2.353499e-5 +1 -1018 1.6588087e-5 +1 -1017 1.3733515e-5 diff --git a/test/golden/actual_data/hdr_file.tsv b/test/golden/actual_data/hdr_file.tsv new file mode 100644 index 0000000..933554e --- /dev/null +++ b/test/golden/actual_data/hdr_file.tsv @@ -0,0 +1,7 @@ +id hdrSigmaLevel hdrStartYearBCAD hdrStopYearBCAD +1 1 -1364 -1360 +1 1 -1282 -1197 +1 1 -1169 -1163 +1 1 -1140 -1131 +1 2 -1379 -1344 +1 2 -1304 -1124 \ No newline at end of file diff --git a/test/golden/actual_data/samples_file.tsv b/test/golden/actual_data/samples_file.tsv new file mode 100644 index 0000000..6b7ece2 --- /dev/null +++ b/test/golden/actual_data/samples_file.tsv @@ -0,0 +1,4 @@ +id yearBCAD +1 -1262 +1 -1362 +1 -1235 diff --git a/test/golden/cal_curve_seg_file.sh b/test/golden/cal_curve_seg_file.sh new file mode 100644 index 0000000..f420e77 --- /dev/null +++ b/test/golden/cal_curve_seg_file.sh @@ -0,0 +1 @@ +currycarbon "3000,30" --calCurveSegFile actual_data/cal_curve_seg_file.tsv \ No newline at end of file diff --git a/test/golden/density_file.sh b/test/golden/density_file.sh new file mode 100644 index 0000000..7f3b615 --- /dev/null +++ b/test/golden/density_file.sh @@ -0,0 +1 @@ +currycarbon "3000,30" --densityFile actual_data/density_file.tsv \ No newline at end of file diff --git a/test/golden/expected_data/cal_curve_seg_file.tsv b/test/golden/expected_data/cal_curve_seg_file.tsv new file mode 100644 index 0000000..0bf79a8 --- /dev/null +++ b/test/golden/expected_data/cal_curve_seg_file.tsv @@ -0,0 +1,503 @@ +calYearBCAD uncalYearBCAD sigma +-1441 -1229 17 +-1440 -1227 17 +-1439 -1226 17 +-1438 -1224 17 +-1437 -1222 17 +-1436 -1221 17 +-1435 -1219 16 +-1434 -1218 16 +-1433 -1217 16 +-1432 -1215 16 +-1431 -1214 16 +-1430 -1212 16 +-1429 -1210 17 +-1428 -1209 17 +-1427 -1207 18 +-1426 -1205 18 +-1425 -1204 19 +-1424 -1202 19 +-1423 -1200 19 +-1422 -1198 19 +-1421 -1196 19 +-1420 -1193 19 +-1419 -1191 19 +-1418 -1189 18 +-1417 -1186 18 +-1416 -1183 17 +-1415 -1181 17 +-1414 -1178 17 +-1413 -1176 17 +-1412 -1173 17 +-1411 -1171 17 +-1410 -1168 17 +-1409 -1166 18 +-1408 -1164 18 +-1407 -1162 18 +-1406 -1160 18 +-1405 -1158 18 +-1404 -1157 18 +-1403 -1155 18 +-1402 -1154 17 +-1401 -1153 17 +-1400 -1152 17 +-1399 -1151 16 +-1398 -1150 16 +-1397 -1149 16 +-1396 -1148 16 +-1395 -1147 17 +-1394 -1146 17 +-1393 -1144 17 +-1392 -1143 18 +-1391 -1142 18 +-1390 -1140 18 +-1389 -1139 18 +-1388 -1137 17 +-1387 -1135 17 +-1386 -1133 17 +-1385 -1131 17 +-1384 -1129 17 +-1383 -1127 17 +-1382 -1125 17 +-1381 -1123 18 +-1380 -1120 18 +-1379 -1118 19 +-1378 -1116 19 +-1377 -1114 19 +-1376 -1112 19 +-1375 -1110 19 +-1374 -1108 19 +-1373 -1106 19 +-1372 -1104 18 +-1371 -1102 18 +-1370 -1101 18 +-1369 -1099 17 +-1368 -1098 17 +-1367 -1097 18 +-1366 -1096 18 +-1365 -1095 18 +-1364 -1094 19 +-1363 -1094 19 +-1362 -1094 19 +-1361 -1094 19 +-1360 -1094 19 +-1359 -1095 19 +-1358 -1096 18 +-1357 -1097 18 +-1356 -1098 17 +-1355 -1099 17 +-1354 -1101 16 +-1353 -1102 16 +-1352 -1104 16 +-1351 -1106 16 +-1350 -1107 16 +-1349 -1109 16 +-1348 -1110 16 +-1347 -1112 16 +-1346 -1113 16 +-1345 -1115 16 +-1344 -1116 16 +-1343 -1118 16 +-1342 -1119 16 +-1341 -1121 16 +-1340 -1124 16 +-1339 -1126 16 +-1338 -1129 16 +-1337 -1133 15 +-1336 -1136 15 +-1335 -1139 15 +-1334 -1142 16 +-1333 -1145 16 +-1332 -1147 16 +-1331 -1148 15 +-1330 -1149 15 +-1329 -1150 15 +-1328 -1150 15 +-1327 -1149 15 +-1326 -1148 16 +-1325 -1147 16 +-1324 -1145 16 +-1323 -1143 16 +-1322 -1141 15 +-1321 -1138 15 +-1320 -1136 15 +-1319 -1134 15 +-1318 -1132 15 +-1317 -1131 15 +-1316 -1130 15 +-1315 -1129 15 +-1314 -1128 15 +-1313 -1127 15 +-1312 -1126 15 +-1311 -1125 15 +-1310 -1124 15 +-1309 -1124 15 +-1308 -1122 15 +-1307 -1121 15 +-1306 -1120 15 +-1305 -1118 15 +-1304 -1116 15 +-1303 -1114 15 +-1302 -1112 15 +-1301 -1109 15 +-1300 -1107 15 +-1299 -1105 15 +-1298 -1103 15 +-1297 -1102 15 +-1296 -1101 16 +-1295 -1100 16 +-1294 -1099 15 +-1293 -1098 15 +-1292 -1098 15 +-1291 -1097 15 +-1290 -1097 15 +-1289 -1096 15 +-1288 -1096 15 +-1287 -1095 14 +-1286 -1094 14 +-1285 -1094 14 +-1284 -1093 14 +-1283 -1092 14 +-1282 -1090 14 +-1281 -1089 15 +-1280 -1088 15 +-1279 -1087 15 +-1278 -1086 15 +-1277 -1085 15 +-1276 -1084 15 +-1275 -1084 15 +-1274 -1084 15 +-1273 -1083 15 +-1272 -1083 15 +-1271 -1083 15 +-1270 -1082 15 +-1269 -1082 15 +-1268 -1081 15 +-1267 -1080 15 +-1266 -1078 15 +-1265 -1076 15 +-1264 -1074 14 +-1263 -1072 15 +-1262 -1070 15 +-1261 -1067 15 +-1260 -1064 15 +-1259 -1061 15 +-1258 -1058 14 +-1257 -1055 14 +-1256 -1053 14 +-1255 -1050 14 +-1254 -1048 15 +-1253 -1047 15 +-1252 -1046 15 +-1251 -1045 15 +-1250 -1046 15 +-1249 -1047 15 +-1248 -1048 15 +-1247 -1049 15 +-1246 -1051 15 +-1245 -1052 15 +-1244 -1054 15 +-1243 -1056 15 +-1242 -1057 15 +-1241 -1059 15 +-1240 -1060 15 +-1239 -1061 15 +-1238 -1061 15 +-1237 -1061 15 +-1236 -1061 15 +-1235 -1061 15 +-1234 -1060 15 +-1233 -1059 15 +-1232 -1058 15 +-1231 -1057 15 +-1230 -1056 15 +-1229 -1055 15 +-1228 -1054 15 +-1227 -1053 15 +-1226 -1051 15 +-1225 -1050 15 +-1224 -1049 15 +-1223 -1048 15 +-1222 -1046 15 +-1221 -1044 15 +-1220 -1042 15 +-1219 -1040 15 +-1218 -1037 15 +-1217 -1034 15 +-1216 -1032 15 +-1215 -1029 15 +-1214 -1026 16 +-1213 -1024 16 +-1212 -1021 17 +-1211 -1019 17 +-1210 -1018 18 +-1209 -1016 18 +-1208 -1015 18 +-1207 -1014 18 +-1206 -1014 18 +-1205 -1013 18 +-1204 -1013 19 +-1203 -1012 19 +-1202 -1011 19 +-1201 -1011 20 +-1200 -1010 20 +-1199 -1009 20 +-1198 -1007 20 +-1197 -1006 20 +-1196 -1004 19 +-1195 -1003 19 +-1194 -1001 18 +-1193 -999 18 +-1192 -998 18 +-1191 -996 17 +-1190 -995 17 +-1189 -993 18 +-1188 -992 18 +-1187 -991 18 +-1186 -990 19 +-1185 -989 19 +-1184 -988 20 +-1183 -988 20 +-1182 -988 20 +-1181 -989 20 +-1180 -989 20 +-1179 -991 20 +-1178 -992 20 +-1177 -994 19 +-1176 -995 19 +-1175 -997 19 +-1174 -999 18 +-1173 -1001 18 +-1172 -1003 18 +-1171 -1005 18 +-1170 -1006 18 +-1169 -1007 19 +-1168 -1008 19 +-1167 -1009 19 +-1166 -1009 19 +-1165 -1009 18 +-1164 -1008 18 +-1163 -1007 17 +-1162 -1005 17 +-1161 -1003 16 +-1160 -1001 16 +-1159 -999 16 +-1158 -997 16 +-1157 -995 16 +-1156 -993 16 +-1155 -990 15 +-1154 -988 15 +-1153 -987 15 +-1152 -985 14 +-1151 -984 14 +-1150 -984 15 +-1149 -984 15 +-1148 -985 15 +-1147 -987 15 +-1146 -989 15 +-1145 -993 14 +-1144 -996 14 +-1143 -1000 14 +-1142 -1003 15 +-1141 -1006 15 +-1140 -1009 15 +-1139 -1011 15 +-1138 -1013 15 +-1137 -1014 15 +-1136 -1014 14 +-1135 -1014 14 +-1134 -1013 15 +-1133 -1012 15 +-1132 -1010 15 +-1131 -1008 15 +-1130 -1005 15 +-1129 -1002 15 +-1128 -999 15 +-1127 -996 15 +-1126 -992 15 +-1125 -989 15 +-1124 -985 15 +-1123 -982 15 +-1122 -979 16 +-1121 -976 16 +-1120 -973 15 +-1119 -971 15 +-1118 -969 15 +-1117 -967 15 +-1116 -965 15 +-1115 -963 15 +-1114 -962 15 +-1113 -960 15 +-1112 -959 15 +-1111 -957 15 +-1110 -956 15 +-1109 -954 15 +-1108 -953 15 +-1107 -951 16 +-1106 -950 16 +-1105 -949 16 +-1104 -948 17 +-1103 -948 17 +-1102 -947 18 +-1101 -947 18 +-1100 -947 18 +-1099 -948 18 +-1098 -948 17 +-1097 -949 17 +-1096 -950 17 +-1095 -950 16 +-1094 -951 16 +-1093 -952 16 +-1092 -953 17 +-1091 -953 17 +-1090 -954 17 +-1089 -955 18 +-1088 -955 18 +-1087 -955 18 +-1086 -955 18 +-1085 -954 18 +-1084 -954 18 +-1083 -953 18 +-1082 -953 17 +-1081 -952 17 +-1080 -951 17 +-1079 -950 17 +-1078 -949 17 +-1077 -949 17 +-1076 -948 18 +-1075 -947 18 +-1074 -947 18 +-1073 -947 18 +-1072 -947 18 +-1071 -947 18 +-1070 -948 18 +-1069 -949 17 +-1068 -950 17 +-1067 -951 17 +-1066 -952 17 +-1065 -953 17 +-1064 -954 17 +-1063 -954 17 +-1062 -955 17 +-1061 -956 17 +-1060 -956 18 +-1059 -956 18 +-1058 -955 18 +-1057 -954 18 +-1056 -953 18 +-1055 -952 18 +-1054 -950 18 +-1053 -947 17 +-1052 -945 17 +-1051 -942 16 +-1050 -939 16 +-1049 -936 16 +-1048 -933 16 +-1047 -930 16 +-1046 -927 16 +-1045 -925 17 +-1044 -922 17 +-1043 -920 17 +-1042 -918 18 +-1041 -917 18 +-1040 -916 18 +-1039 -915 18 +-1038 -915 18 +-1037 -915 18 +-1036 -916 18 +-1035 -917 17 +-1034 -918 17 +-1033 -919 17 +-1032 -920 16 +-1031 -922 16 +-1030 -923 16 +-1029 -924 16 +-1028 -926 17 +-1027 -927 17 +-1026 -928 17 +-1025 -929 17 +-1024 -930 18 +-1023 -930 18 +-1022 -930 18 +-1021 -929 18 +-1020 -928 18 +-1019 -927 18 +-1018 -925 17 +-1017 -923 17 +-1016 -921 16 +-1015 -918 16 +-1014 -915 16 +-1013 -912 15 +-1012 -909 15 +-1011 -906 15 +-1010 -902 15 +-1009 -899 16 +-1008 -896 16 +-1007 -893 16 +-1006 -890 17 +-1005 -887 17 +-1004 -885 17 +-1003 -883 17 +-1002 -881 17 +-1001 -880 17 +-1000 -879 17 +-999 -878 17 +-998 -878 16 +-997 -877 16 +-996 -877 16 +-995 -877 16 +-994 -877 17 +-993 -877 17 +-992 -877 17 +-991 -877 18 +-990 -877 18 +-989 -877 18 +-988 -876 18 +-987 -876 18 +-986 -875 18 +-985 -875 17 +-984 -874 17 +-983 -873 16 +-982 -872 16 +-981 -871 16 +-980 -869 16 +-979 -868 16 +-978 -867 16 +-977 -866 16 +-976 -864 17 +-975 -863 17 +-974 -862 17 +-973 -860 17 +-972 -859 17 +-971 -858 17 +-970 -857 17 +-969 -856 16 +-968 -855 16 +-967 -855 16 +-966 -854 16 +-965 -853 16 +-964 -853 16 +-963 -853 17 +-962 -853 17 +-961 -853 17 +-960 -853 18 +-959 -854 18 +-958 -854 18 +-957 -855 18 +-956 -856 18 +-955 -857 18 +-954 -859 17 +-953 -860 17 +-952 -862 16 +-951 -863 16 +-950 -865 16 +-949 -866 16 +-948 -867 16 +-947 -869 16 +-946 -870 16 +-945 -871 16 +-944 -872 17 +-943 -872 17 +-942 -872 17 +-941 -872 17 +-940 -872 18 \ No newline at end of file diff --git a/test/golden/expected_data/density_file.tsv b/test/golden/expected_data/density_file.tsv new file mode 100644 index 0000000..fe33569 --- /dev/null +++ b/test/golden/expected_data/density_file.tsv @@ -0,0 +1,399 @@ +id yearBCAD density +1 -1414 1.2485408e-5 +1 -1413 1.5097789e-5 +1 -1412 1.9989839e-5 +1 -1411 2.4033061e-5 +1 -1410 3.154145e-5 +1 -1409 4.366613e-5 +1 -1408 5.182399e-5 +1 -1407 6.135777e-5 +1 -1406 7.2468654e-5 +1 -1405 8.5380896e-5 +1 -1404 9.258939e-5 +1 -1403 1.08679174e-4 +1 -1402 1.0427339e-4 +1 -1401 1.130317e-4 +1 -1400 1.2244577e-4 +1 -1399 1.1830085e-4 +1 -1398 1.2825013e-4 +1 -1397 1.389422e-4 +1 -1396 1.504234e-4 +1 -1395 1.8086852e-4 +1 -1394 1.9515397e-4 +1 -1393 2.2673856e-4 +1 -1392 2.6929172e-4 +1 -1391 2.8920497e-4 +1 -1390 3.3288536e-4 +1 -1389 3.56778e-4 +1 -1388 3.7510364e-4 +1 -1387 4.3041562e-4 +1 -1386 4.9248507e-4 +1 -1385 5.6189613e-4 +1 -1384 6.392443e-4 +1 -1383 7.2512927e-4 +1 -1382 8.201471e-4 +1 -1381 9.840226e-4 +1 -1380 1.1662486e-3 +1 -1379 1.3735098e-3 +1 -1378 1.5234945e-3 +1 -1377 1.6849806e-3 +1 -1376 1.8581729e-3 +1 -1375 2.0431816e-3 +1 -1374 2.2400115e-3 +1 -1373 2.448548e-3 +1 -1372 2.5780778e-3 +1 -1371 2.8082137e-3 +1 -1370 2.927483e-3 +1 -1369 3.084582e-3 +1 -1368 3.2117483e-3 +1 -1367 3.4306804e-3 +1 -1366 3.5625e-3 +1 -1365 3.6964875e-3 +1 -1364 3.921976e-3 +1 -1363 3.921976e-3 +1 -1362 3.921976e-3 +1 -1361 3.921976e-3 +1 -1360 3.921976e-3 +1 -1359 3.786735e-3 +1 -1358 3.5625e-3 +1 -1357 3.4306804e-3 +1 -1356 3.2117483e-3 +1 -1355 3.084582e-3 +1 -1354 2.7521139e-3 +1 -1353 2.6336822e-3 +1 -1352 2.4060064e-3 +1 -1351 2.1909003e-3 +1 -1350 2.0881426e-3 +1 -1349 1.8922987e-3 +1 -1348 1.7992173e-3 +1 -1347 1.6226898e-3 +1 -1346 1.5392012e-3 +1 -1345 1.3816209e-3 +1 -1344 1.3074493e-3 +1 -1343 1.1681007e-3 +1 -1342 1.1028139e-3 +1 -1341 9.807099e-4 +1 -1340 8.1771566e-4 +1 -1339 7.2165084e-4 +1 -1338 5.9492886e-4 +1 -1337 4.2088976e-4 +1 -1336 3.398045e-4 +1 -1335 2.7252332e-4 +1 -1334 2.3873866e-4 +1 -1333 1.9009615e-4 +1 -1332 1.6274204e-4 +1 -1331 1.3520932e-4 +1 -1330 1.2463809e-4 +1 -1329 1.1481369e-4 +1 -1328 1.1481369e-4 +1 -1327 1.2463809e-4 +1 -1326 1.504234e-4 +1 -1325 1.6274204e-4 +1 -1324 1.9009615e-4 +1 -1323 2.2143334e-4 +1 -1322 2.3438824e-4 +1 -1321 2.9353763e-4 +1 -1320 3.398045e-4 +1 -1319 3.9220374e-4 +1 -1318 4.51336e-4 +1 -1317 4.836211e-4 +1 -1316 5.178245e-4 +1 -1315 5.540268e-4 +1 -1314 5.9230917e-4 +1 -1313 6.327529e-4 +1 -1312 6.754396e-4 +1 -1311 7.204508e-4 +1 -1310 7.6786714e-4 +1 -1309 7.6786714e-4 +1 -1308 8.70235e-4 +1 -1307 9.2534255e-4 +1 -1306 9.831673e-4 +1 -1305 1.1072573e-3 +1 -1304 1.243054e-3 +1 -1303 1.3910474e-3 +1 -1302 1.5516536e-3 +1 -1301 1.8168977e-3 +1 -1300 2.010228e-3 +1 -1299 2.2168152e-3 +1 -1298 2.4365452e-3 +1 -1297 2.551259e-3 +1 -1296 2.7521139e-3 +1 -1295 2.8735236e-3 +1 -1294 2.9141635e-3 +1 -1293 3.0411426e-3 +1 -1292 3.0411426e-3 +1 -1291 3.170975e-3 +1 -1290 3.170975e-3 +1 -1289 3.303553e-3 +1 -1288 3.303553e-3 +1 -1287 3.358479e-3 +1 -1286 3.4965212e-3 +1 -1285 3.4965212e-3 +1 -1284 3.6370563e-3 +1 -1283 3.7799273e-3 +1 -1282 4.071976e-3 +1 -1281 4.296666e-3 +1 -1280 4.4457754e-3 +1 -1279 4.5960867e-3 +1 -1278 4.747369e-3 +1 -1277 4.8993793e-3 +1 -1276 5.0518652e-3 +1 -1275 5.0518652e-3 +1 -1274 5.0518652e-3 +1 -1273 5.204564e-3 +1 -1272 5.204564e-3 +1 -1271 5.204564e-3 +1 -1270 5.3572045e-3 +1 -1269 5.3572045e-3 +1 -1268 5.509506e-3 +1 -1267 5.661181e-3 +1 -1266 5.9614684e-3 +1 -1265 6.255645e-3 +1 -1264 6.4968583e-3 +1 -1263 6.815701e-3 +1 -1262 7.0765605e-3 +1 -1261 7.436963e-3 +1 -1260 7.7533075e-3 +1 -1259 8.018399e-3 +1 -1258 8.219791e-3 +1 -1257 8.368772e-3 +1 -1256 8.430688e-3 +1 -1255 8.46572e-3 +1 -1254 8.450533e-3 +1 -1253 8.431589e-3 +1 -1252 8.40514e-3 +1 -1251 8.371258e-3 +1 -1250 8.40514e-3 +1 -1249 8.431589e-3 +1 -1248 8.450533e-3 +1 -1247 8.4619215e-3 +1 -1246 8.4619215e-3 +1 -1245 8.450533e-3 +1 -1244 8.40514e-3 +1 -1243 8.330035e-3 +1 -1242 8.281585e-3 +1 -1241 8.1635425e-3 +1 -1240 8.094268e-3 +1 -1239 8.018399e-3 +1 -1238 8.018399e-3 +1 -1237 8.018399e-3 +1 -1236 8.018399e-3 +1 -1235 8.018399e-3 +1 -1234 8.094268e-3 +1 -1233 8.1635425e-3 +1 -1232 8.226037e-3 +1 -1231 8.281585e-3 +1 -1230 8.330035e-3 +1 -1229 8.371258e-3 +1 -1228 8.40514e-3 +1 -1227 8.431589e-3 +1 -1226 8.4619215e-3 +1 -1225 8.46572e-3 +1 -1224 8.4619215e-3 +1 -1223 8.450533e-3 +1 -1222 8.40514e-3 +1 -1221 8.330035e-3 +1 -1220 8.226037e-3 +1 -1219 8.094268e-3 +1 -1218 7.847695e-3 +1 -1217 7.547679e-3 +1 -1216 7.321346e-3 +1 -1215 6.947988e-3 +1 -1214 6.58651e-3 +1 -1213 6.3064555e-3 +1 -1212 5.930375e-3 +1 -1211 5.637854e-3 +1 -1210 5.558337e-3 +1 -1209 5.2662515e-3 +1 -1208 5.119849e-3 +1 -1207 4.9735317e-3 +1 -1206 4.9735317e-3 +1 -1205 4.8275352e-3 +1 -1204 4.907322e-3 +1 -1203 4.7636973e-3 +1 -1202 4.6206973e-3 +1 -1201 4.704902e-3 +1 -1200 4.5643696e-3 +1 -1199 4.424721e-3 +1 -1198 4.1488023e-3 +1 -1197 4.012877e-3 +1 -1196 3.653369e-3 +1 -1195 3.5220152e-3 +1 -1194 3.1740458e-3 +1 -1193 2.927483e-3 +1 -1192 2.8082137e-3 +1 -1191 2.4904548e-3 +1 -1190 2.3804714e-3 +1 -1189 2.254728e-3 +1 -1188 2.1529314e-3 +1 -1187 2.054169e-3 +1 -1186 2.0431816e-3 +1 -1185 1.9491977e-3 +1 -1184 1.943383e-3 +1 -1183 1.943383e-3 +1 -1182 1.943383e-3 +1 -1181 2.0357415e-3 +1 -1180 2.0357415e-3 +1 -1179 2.2290554e-3 +1 -1178 2.3299854e-3 +1 -1177 2.448548e-3 +1 -1176 2.5571366e-3 +1 -1175 2.7827376e-3 +1 -1174 2.927483e-3 +1 -1173 3.1740458e-3 +1 -1172 3.4306804e-3 +1 -1171 3.6964875e-3 +1 -1170 3.8325028e-3 +1 -1169 4.058939e-3 +1 -1168 4.197468e-3 +1 -1167 4.3373904e-3 +1 -1166 4.3373904e-3 +1 -1165 4.2511714e-3 +1 -1164 4.1100103e-3 +1 -1163 3.8835478e-3 +1 -1162 3.6081513e-3 +1 -1161 3.2547812e-3 +1 -1160 2.9978324e-3 +1 -1159 2.7521139e-3 +1 -1158 2.5182941e-3 +1 -1157 2.2968634e-3 +1 -1156 2.0881426e-3 +1 -1155 1.725199e-3 +1 -1154 1.5516536e-3 +1 -1153 1.4697512e-3 +1 -1152 1.253416e-3 +1 -1151 1.1826834e-3 +1 -1150 1.243054e-3 +1 -1149 1.243054e-3 +1 -1148 1.3154981e-3 +1 -1147 1.4697512e-3 +1 -1146 1.6367924e-3 +1 -1145 1.9362542e-3 +1 -1144 2.2478928e-3 +1 -1143 2.7102758e-3 +1 -1142 3.170975e-3 +1 -1141 3.5764445e-3 +1 -1140 4.002923e-3 +1 -1139 4.296666e-3 +1 -1138 4.5960867e-3 +1 -1137 4.747369e-3 +1 -1136 4.6756812e-3 +1 -1135 4.6756812e-3 +1 -1134 4.5960867e-3 +1 -1133 4.4457754e-3 +1 -1132 4.1489787e-3 +1 -1131 3.8586948e-3 +1 -1130 3.438754e-3 +1 -1129 3.0411426e-3 +1 -1128 2.6691433e-3 +1 -1127 2.3250505e-3 +1 -1126 1.9119045e-3 +1 -1125 1.6367924e-3 +1 -1124 1.3154981e-3 +1 -1123 1.1072573e-3 +1 -1122 9.807099e-4 +1 -1121 8.1771566e-4 +1 -1120 6.327529e-4 +1 -1119 5.540268e-4 +1 -1118 4.836211e-4 +1 -1117 4.2088976e-4 +1 -1116 3.6520066e-4 +1 -1115 3.1594106e-4 +1 -1114 2.9353763e-4 +1 -1113 2.5282917e-4 +1 -1112 2.3438824e-4 +1 -1111 2.0100856e-4 +1 -1110 1.8594635e-4 +1 -1109 1.5878474e-4 +1 -1108 1.4657498e-4 +1 -1107 1.389422e-4 +1 -1106 1.2825013e-4 +1 -1105 1.1830085e-4 +1 -1104 1.2244577e-4 +1 -1103 1.2244577e-4 +1 -1102 1.2724288e-4 +1 -1101 1.2724288e-4 +1 -1100 1.2724288e-4 +1 -1099 1.3755045e-4 +1 -1098 1.2244577e-4 +1 -1097 1.32557e-4 +1 -1096 1.4340869e-4 +1 -1095 1.2825013e-4 +1 -1094 1.389422e-4 +1 -1093 1.504234e-4 +1 -1092 1.8086852e-4 +1 -1091 1.8086852e-4 +1 -1090 1.9515397e-4 +1 -1089 2.330168e-4 +1 -1088 2.330168e-4 +1 -1087 2.330168e-4 +1 -1086 2.330168e-4 +1 -1085 2.1653922e-4 +1 -1084 2.1653922e-4 +1 -1083 2.0109423e-4 +1 -1082 1.8086852e-4 +1 -1081 1.6751625e-4 +1 -1080 1.5504599e-4 +1 -1079 1.4340869e-4 +1 -1078 1.32557e-4 +1 -1077 1.32557e-4 +1 -1076 1.3755045e-4 +1 -1075 1.2724288e-4 +1 -1074 1.2724288e-4 +1 -1073 1.2724288e-4 +1 -1072 1.2724288e-4 +1 -1071 1.2724288e-4 +1 -1070 1.3755045e-4 +1 -1069 1.32557e-4 +1 -1068 1.4340869e-4 +1 -1067 1.5504599e-4 +1 -1066 1.6751625e-4 +1 -1065 1.8086852e-4 +1 -1064 1.9515397e-4 +1 -1063 1.9515397e-4 +1 -1062 2.1042564e-4 +1 -1061 2.2673856e-4 +1 -1060 2.5058194e-4 +1 -1059 2.5058194e-4 +1 -1058 2.330168e-4 +1 -1057 2.1653922e-4 +1 -1056 2.0109423e-4 +1 -1055 1.8662821e-4 +1 -1054 1.6042878e-4 +1 -1053 1.130317e-4 +1 -1052 9.6131356e-5 +1 -1051 6.597864e-5 +1 -1050 5.0871124e-5 +1 -1049 3.8998478e-5 +1 -1048 2.9728637e-5 +1 -1047 2.2536955e-5 +1 -1046 1.6992273e-5 +1 -1045 1.6588087e-5 +1 -1044 1.2485408e-5 +1 -1043 1.0301709e-5 +1 -1042 1.0203481e-5 +1 -1041 9.27303e-6 +1 -1040 8.422884e-6 +1 -1039 7.6465785e-6 +1 -1038 7.6465785e-6 +1 -1037 7.6465785e-6 +1 -1036 8.422884e-6 +1 -1035 7.68871e-6 +1 -1034 8.480968e-6 +1 -1033 9.349705e-6 +1 -1032 8.6125665e-6 +1 -1031 1.04884075e-5 +1 -1030 1.1564427e-5 +1 -1029 1.2743456e-5 +1 -1028 1.8214989e-5 +1 -1027 1.9989839e-5 +1 -1026 2.192485e-5 +1 -1025 2.4033061e-5 +1 -1024 3.0780615e-5 +1 -1023 3.0780615e-5 +1 -1022 3.0780615e-5 +1 -1021 2.8162687e-5 +1 -1020 2.5752486e-5 +1 -1019 2.353499e-5 +1 -1018 1.6588087e-5 +1 -1017 1.3733515e-5 diff --git a/test/golden/expected_data/hdr_file.tsv b/test/golden/expected_data/hdr_file.tsv new file mode 100644 index 0000000..933554e --- /dev/null +++ b/test/golden/expected_data/hdr_file.tsv @@ -0,0 +1,7 @@ +id hdrSigmaLevel hdrStartYearBCAD hdrStopYearBCAD +1 1 -1364 -1360 +1 1 -1282 -1197 +1 1 -1169 -1163 +1 1 -1140 -1131 +1 2 -1379 -1344 +1 2 -1304 -1124 \ No newline at end of file diff --git a/test/golden/expected_data/samples_file.tsv b/test/golden/expected_data/samples_file.tsv new file mode 100644 index 0000000..6b7ece2 --- /dev/null +++ b/test/golden/expected_data/samples_file.tsv @@ -0,0 +1,4 @@ +id yearBCAD +1 -1262 +1 -1362 +1 -1235 diff --git a/test/golden/expected_data/single_radiocarbon_date.out b/test/golden/expected_data/single_radiocarbon_date.out new file mode 100644 index 0000000..0409fa8 --- /dev/null +++ b/test/golden/expected_data/single_radiocarbon_date.out @@ -0,0 +1,20 @@ +currycarbon v0.3.0.0 (UTF-8) +Method: Bchron {distribution = StudentTDist {ndf = 100.0}} +Curve: IntCal20 +Calibrating... +Done. +CalEXPR: [1] 1:3000±30BP +Calibrated: 1379BC >> 1364BC > 1238BC < 1131BC << 1124BC +1-sigma: 1364-1360BC, 1282-1197BC, 1169-1163BC, 1140-1131BC +2-sigma: 1379-1344BC, 1304-1124BC + ▁▁▁▁▁▁ + ▁▒▒▒▒▒▒▁ + ▁▒▒▒▒▒▒▒▒▁ + ▁ ▁▁▒▒▒▒▒▒▒▒▒▒▁ ▁ ▁▁ + ▁▒▁ ▁▁▒▒▒▒▒▒▒▒▒▒▒▒▒▁▁▁▁▒▁ ▒▒ + ▁▁▒▒▒▁▁ ▁▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▁▁▒▒▁ + ▁▁▁▁▒▒▒▒▒▒▒▁▁▁▁▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ + -1410 ┄──┬─────────────┬─────────────┬─────────────┬────────────┄ -1020 + > > ^ << + ─ ────────────── ─ ── + ────── ─────────────────────────── diff --git a/test/golden/hdr_file.sh b/test/golden/hdr_file.sh new file mode 100644 index 0000000..4ef380b --- /dev/null +++ b/test/golden/hdr_file.sh @@ -0,0 +1 @@ +currycarbon "3000,30" --hdrFile actual_data/hdr_file.tsv \ No newline at end of file diff --git a/test/golden/samples_file.sh b/test/golden/samples_file.sh new file mode 100644 index 0000000..4e138d0 --- /dev/null +++ b/test/golden/samples_file.sh @@ -0,0 +1 @@ +currycarbon "3000,30" --samplesFile actual_data/samples_file.tsv -n 3 --seed 123 \ No newline at end of file diff --git a/test/golden/single_radiocarbon_date.sh b/test/golden/single_radiocarbon_date.sh new file mode 100644 index 0000000..4e7abbf --- /dev/null +++ b/test/golden/single_radiocarbon_date.sh @@ -0,0 +1 @@ +currycarbon "3000,30" \ No newline at end of file