diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..95de9dc --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "rust-analyzer.linkedProjects": [ + "./cli/Cargo.toml", + "./pta-ledger/Cargo.toml", + "./pta-parser/Cargo.toml", + "./pta-types/Cargo.toml" + ] +} \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7addbb2 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,19 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + +[workspace] +resolver = "2" +members = [ + "cli" + ,"pta-ledger" + ,"pta-parser" + ,'pta-types', +] + +# Default values for workspace projects +[workspace.package] +edition = "2021" +version = "0.2.0" +authors = ["AltaModa Technologies"] +# respository = "https://github.com/altamodatech/pta-parser" diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 0000000..e81de08 --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,19 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + +[package] +name = "cli" +version.workspace = true +authors.workspace = true +# respository.workspace = true +edition.workspace = true + + +[dependencies] +log = "0.4.21" +pest = "2.7.3" +pretty_env_logger = "0.5.0" +pta-ledger = { path = "../pta-ledger" } +pta-parser = { path = "../pta-parser" } +pta-types = { path = "../pta-types" } diff --git a/cli/src/main.rs b/cli/src/main.rs new file mode 100644 index 0000000..18e8c28 --- /dev/null +++ b/cli/src/main.rs @@ -0,0 +1,67 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +extern crate pta_ledger; +extern crate pta_parser; + + +use log::{info, warn, error}; + +// TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) +use pta_ledger::ledger_builder::LedgerBuilder; + + + +fn main() -> Result<(), Box> { + // TODO: CLI improvements + // - exec with path of file to parse + // - optionally output parse results (should be equivalent to input file) + + // TODO: consider flag to use init_timed to include time per line + pretty_env_logger::init(); + + let pb = std::env::current_dir()?; + let p = pb.join("testdata/basic-ledger"); + + info!("Input file: {:?}", p); + + let mut bldr = LedgerBuilder::default(); + match std::fs::read_to_string(p) { + Ok(ledger) => { + info!("String length from input: {}", ledger.len()); + match bldr.from_string(&ledger) { + Ok(_parsed) => { + info!("Successfully parsed into ParsedLedger"); + return Ok(()); + }, + + Err(e) => { + error!("LedgerBuilder failed with {:}", e); + return Err(e); + } + } + } + + Err(e) => { + warn!("failed to read file as string; {e}"); + return Err(Box::new(e)); + } + } + +} + + + +#[cfg(test)] +mod cli_tests { + + use pta_parser::parsers::generic::Parser; + + #[test] + fn can_create_parser() { + // simply verifies that the parser can be instantiated, ensuring accessibility + let _ = Parser{}; + } +} \ No newline at end of file diff --git a/journal.md b/journal.md new file mode 100644 index 0000000..2f350e7 --- /dev/null +++ b/journal.md @@ -0,0 +1,7 @@ +# Plain-Text Accounting Parser + +## History + +### 10/18/2023 + +Abandonded effort to integrate [pest_consume](https://lib.rs/crates/pest_consume) since the author is no longer maintaining it. diff --git a/pta-ledger/Cargo.toml b/pta-ledger/Cargo.toml new file mode 100644 index 0000000..ec2aa88 --- /dev/null +++ b/pta-ledger/Cargo.toml @@ -0,0 +1,24 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + +[package] +name = "pta-ledger" +version.workspace = true +authors.workspace = true +# respository.workspace = true +edition.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +bench = false + +[dependencies] +log = { version = "0.4.20", features = ["kv_unstable", "kv_unstable_serde"] } +pest = "2.7.3" +pest_derive = "2.7.3" +pta-parser = { path = "../pta-parser" } +pta-types ={ path = "../pta-types" } + +[dev-dependencies] +rstest = "0.19.0" diff --git a/pta-ledger/src/ledger_builder.rs b/pta-ledger/src/ledger_builder.rs new file mode 100644 index 0000000..fe0125b --- /dev/null +++ b/pta-ledger/src/ledger_builder.rs @@ -0,0 +1,250 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + +use log::{info, warn}; + +use pta_types::*; + + + +// TODO: how to isolate pest so clients can just use lib (w/o requiring pest as here) +use pest::{*, iterators::Pair}; +use pta_parser::parsers::generic; + +#[derive(Default)] +pub struct LedgerBuilder { + pl: ParsedLedger +} + +impl LedgerBuilder { + pub fn from_string(self: &mut Self, ledger: &String) -> Result<&mut ParsedLedger, Box> { + + self.pl = ParsedLedger::default(); + + match generic::Parser::parse(generic::Rule::generic_ledger, &ledger) { + Ok(root) => { + info!("Successfully parsed with generic::Rule::generic_ledger"); + for pair in root.into_iter() { + info!("LedgerBuilder::from_string: root pair is {:}", pair.as_str()); + self.handle_pair(pair)?; + } + } + + Err(err) => { + warn!("failed to parse with generic::Rule::generic_ledger. err: {err}"); + return Err(Box::new(err)); + } + } + + return Ok(&mut self.pl); + } + + + fn handle_pair(self: &Self, pair: Pair<'_, generic::Rule>) -> Result<(), Box> { + + match pair.as_rule() { + generic::Rule::comment => { + info!("generic::Rule::comment: {:?}", pair.as_span().as_str()); + } + generic::Rule::EOI => { + info!("generic::Rule::EOI at {:?}", pair.line_col()); + } + + generic::Rule::WHITESPACE => {} + generic::Rule::acct_descriptor => { dump_pair(&pair); return Ok(()); } + generic::Rule::acct_separator => { dump_pair(&pair); return Ok(()); } + generic::Rule::balance_directive => { dump_pair(&pair); return Ok(()); } + generic::Rule::comment_or_newline => { dump_pair(&pair); return Ok(()); } + generic::Rule::comment_token => { dump_pair(&pair); return Ok(()); } + generic::Rule::currency => { dump_pair(&pair); return Ok(()); } + generic::Rule::decimal_value => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_close => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_commodity => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_open => { dump_pair(&pair); return Ok(()); } + generic::Rule::directives => { dump_pair(&pair); return Ok(()); } + generic::Rule::empty_line => {} + generic::Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } + generic::Rule::generic_ledger => { + return handle_ledger_rule(&pair); + } + generic::Rule::options => { dump_pair(&pair); return Ok(()); } + generic::Rule::posting_basic => { + dump_pair(&pair); return Ok(()); + } + generic::Rule::posting_indent => { dump_pair(&pair); return Ok(()); } + generic::Rule::sub_acct => { dump_pair(&pair); return Ok(()); } + generic::Rule::top_level_acct => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_annotation => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_description => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_header => { + let mut xn = raw_transaction::RawTransaction::default(); + return handle_trans_header(&mut xn, &pair); + } + generic::Rule::transaction_block => { + let mut xn = raw_transaction::RawTransaction::default(); + return handle_trans_block(&mut xn, &pair); + } + } + + return Ok(()); + + } + +} + + +fn dump_rule_of_pair(p: &Pair) { + info!("RULE: {:?} at {:?}; SPAN: {:?}", &p.as_rule(), &p.line_col(), &p.as_span()); +} + +// REMOVE: +#[allow(dead_code)] +fn dump_rule(r:&generic::Rule, s:&Span) { + info!("RULE: {:?}; SPAN: {:?}", &r, &s); +} + +fn dump_pair(p:&Pair) { + dump_rule_of_pair(p); +} + + + +fn handle_ledger_rule(pair: & Pair) -> Result<(), Box> { + for inner_pair in pair.clone().into_inner() { + + match handle_pair(inner_pair) { + Ok(_p) => { /* handle_pair does all the work */ } + + Err(err) => { + warn!("handle_pair failed in handle_ledger. err: {err}"); + return Err(err); + } + }; + } + + return Ok(()); +} + +#[allow(dead_code)] // TODO: REMOVE allow dead code +fn handle_posting_basic(_xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { + + match generic::Parser::parse(generic::Rule::posting_basic, pair.as_span().as_str()) { + Ok(_posting) => { + info!("handling posting_basic"); + // handle_posting_basic(xn, posting); TODO: fix + } + + Err(e) => { + warn!("failed to parse with posting_basic. err: {e}"); + return Err(Box::new(e)); + } + + } + + return Ok(()); +} + +fn handle_trans_header(_: &mut raw_transaction::RawTransaction, _: &Pair) -> Result<(), Box> { + info!("handling trans_header..."); + + return Ok(()); +} + +fn handle_trans_block(xn: &mut raw_transaction::RawTransaction, pair: &Pair) -> Result<(), Box> { + info!("handling trans_block..."); + + xn.pinfo = ParserInfo { + position: FilePosition { + line: pair.line_col().0, + col: pair.line_col().1 + } + }; + + info!("parse with trans_header"); + match generic::Parser::parse(generic::Rule::trans_header, &pair.as_span().as_str()) { + Ok(hdr) => { + for pair in hdr.into_iter() { + info!("attempt handle_trans_header on {}", pair.as_span().as_str()); + match handle_trans_header(xn, &pair) { + Ok(()) => { + // TODO: REVIEW: should anything happen here? + } + + Err(e) => { + warn!("handle_trans_header failed. err: {e}"); + return Err(e); + } + } + + } + // for p in &pair.into_inner() { + // handle_posting_basic(&mut xn, &p); + // } + } + + Err(e) => { + warn!("failed to parse with trans_header. err: {e}"); + return Err(Box::new(e)); + } + } + + return Ok(()); + +} + + + + +fn handle_pair(pair: Pair<'_, generic::Rule>) -> Result<(), Box> { + match pair.as_rule() { + generic::Rule::comment => { + info!("generic::Rule::comment: {:?}", pair.as_span().as_str()); + } + generic::Rule::EOI => { + info!("generic::Rule::EOI at {:?}", pair.line_col()); + } + + generic::Rule::WHITESPACE => {} + generic::Rule::acct_descriptor => { dump_pair(&pair); return Ok(()); } + generic::Rule::acct_separator => { dump_pair(&pair); return Ok(()); } + generic::Rule::balance_directive => { dump_pair(&pair); return Ok(()); } + generic::Rule::comment_or_newline => { dump_pair(&pair); return Ok(()); } + generic::Rule::comment_token => { dump_pair(&pair); return Ok(()); } + generic::Rule::currency => { dump_pair(&pair); return Ok(()); } + generic::Rule::decimal_value => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_close => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_commodity => { dump_pair(&pair); return Ok(()); } + generic::Rule::directive_open => { dump_pair(&pair); return Ok(()); } + generic::Rule::directives => { dump_pair(&pair); return Ok(()); } + generic::Rule::empty_line => {} + generic::Rule::iso8601_date_extended => { dump_pair(&pair); return Ok(()); } + generic::Rule::generic_ledger => { + return handle_ledger_rule(&pair); + } + generic::Rule::options => { dump_pair(&pair); return Ok(()); } + generic::Rule::posting_basic => { dump_pair(&pair); return Ok(()); } + generic::Rule::posting_indent => { dump_pair(&pair); return Ok(()); } + generic::Rule::sub_acct => { dump_pair(&pair); return Ok(()); } + generic::Rule::top_level_acct => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_annotation => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_description => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_description_text => { dump_pair(&pair); return Ok(()); } + generic::Rule::trans_header => { + let mut xn = raw_transaction::RawTransaction::default(); + return handle_trans_header(&mut xn, &pair); + } + generic::Rule::transaction_block => { + let mut xn = raw_transaction::RawTransaction::default(); + return handle_trans_block(&mut xn, &pair); + } + } + + return Ok(()); + +} + + diff --git a/pta-ledger/src/lib.rs b/pta-ledger/src/lib.rs new file mode 100644 index 0000000..83b94e0 --- /dev/null +++ b/pta-ledger/src/lib.rs @@ -0,0 +1,13 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +pub extern crate pest; +pub extern crate pest_derive; +pub extern crate pta_parser; +pub extern crate pta_types; + +pub mod ledger_builder; + +pub extern crate log; \ No newline at end of file diff --git a/pta-parser/Cargo.toml b/pta-parser/Cargo.toml new file mode 100644 index 0000000..4fba65e --- /dev/null +++ b/pta-parser/Cargo.toml @@ -0,0 +1,23 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + +[package] +name = "pta-parser" +version.workspace = true +authors.workspace = true +# respository.workspace = true +edition.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +bench = false + +[dependencies] +log = "0.4.20" +pest = "2.7.3" +pest_derive = "2.7.3" +pta-types = { path = "../pta-types" } + +[dev-dependencies] +rstest = "0.19.0" diff --git a/pta-parser/src/grammars/base.pest b/pta-parser/src/grammars/base.pest new file mode 100644 index 0000000..e2b2192 --- /dev/null +++ b/pta-parser/src/grammars/base.pest @@ -0,0 +1,44 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +WHITESPACE = _{ " " | "\t" } + +// constants +acct_separator = _{ ":" } +comment_token = _{ ";" | "*" } + +// TODO: need to handle escaped semi-colon? +// TODO: consider whether comment must be preceded by whitespace (except at beginning of line) +// a comment +comment = _{ comment_token ~ (!NEWLINE ~ ANY)* ~ NEWLINE } +comment_or_newline = _{ (WHITESPACE+ ~ comment) | (WHITESPACE* ~ NEWLINE) } +empty_line = _{ WHITESPACE* ~ NEWLINE } + + +// ISO8601 Date Extended format is YYYY-MM-DD where +// YYYY is 4 digits; 0000-9999 +// MM is 2 digits; 01-09, 10-12 +// DD is 2 digits; 01-09, 10-29, 30, 31 +iso8601_date_extended = @{ + ASCII_DIGIT{4} + ~ "-" ~ (( "0" ~ ASCII_NONZERO_DIGIT) | ("1" ~ '0'..'2')) + ~ "-" ~ (("30" | "31") | ("0" ~ ASCII_NONZERO_DIGIT) | ('1'..'2' ~ ASCII_DIGIT)) +} + + +currency = { ASCII_ALPHA_UPPER{3} } + + + +// +// Pest's built-in rules: +// ASCII_ALPHA_LOWER = { 'a'..'z' } +// ASCII_ALPHA_UPPER = { 'A'..'Z' } +// ASCII_ALPHA = { ASCII_ALPHA_LOWER | ASCII_ALPHA_UPPER } +// ASCII_DIGIT = { '0'..'9' } +// ASCII_ALPHANUMERIC = { ASCII_ALPHA | ASCII-DIGIT } +// +// Avoid using WHITE_SPACE which targets [unicode](https://www.unicode.org/reports/tr31/#R3a) +// diff --git a/pta-parser/src/grammars/beancount.pest b/pta-parser/src/grammars/beancount.pest new file mode 100644 index 0000000..d7b4abc --- /dev/null +++ b/pta-parser/src/grammars/beancount.pest @@ -0,0 +1,114 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + +// Each acct token must begin with alpha and may be followed by any number of alpha or number +// Full account descriptors are comprised of colon-separated account names. The top-level +// account name must begin with an alpha char, but subaccounts may begin with alphanumeric. +// BEANCOUNT diffs: requires account names to being with upper case alpha +top_level_acct = @{ ASCII_ALPHA_UPPER ~ ASCII_ALPHANUMERIC* } +sub_acct = @{ acct_separator ~ ASCII_ALPHA_UPPER ~ ASCII_ALPHANUMERIC+ } + +// The full acct descriptor must be one or more acct tokens, each separated by a colon +acct_descriptor = @{ top_level_acct ~ (sub_acct)* } + +decimal_value = @{ (("-" ~ NUMBER+) | NUMBER+) ~ "." ~ NUMBER+ } + + +// TODO: consider more lax indent rules +// Posting lines of a transaction must begin with 1 tab or 2 spaces +posting_indent = _{ "\t" | " "{2} } +// A basic posting must specify an account and a value, ending with a comment or newline +posting_basic = @{ + posting_indent + ~ acct_descriptor + ~ WHITESPACE+ ~ decimal_value + ~ comment_or_newline +} + +// TODO: improve on 'text' to allow more in description +trans_description_text = _{ (ASCII_ALPHANUMERIC+ | WHITESPACE)+ } +// TODO: is this the full set of annotation options? +trans_annotation = _{ "txn" | "*" | "!" } +trans_description = _{ "\"" ~ trans_description_text ~ "\"" } + +// TODO: how to ensure col 0 / no ws for header row +// The header of a transaction specifies the date, an annotation, a description, and ends with a comment or newline +trans_header = @{ + iso8601_date_extended + ~ WHITESPACE+ + ~ trans_annotation + ~ WHITESPACE+ + ~ trans_description + ~ comment_or_newline +} + +// A transaction begins with a single header followed by one or more postings. Whether the transaction balances is +// outside the scope of parsing. +transaction_block = @{ trans_header ~ posting_basic+ } + + +options = { "operating_currency" } + +// TODO: open works but is incomplete +// YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] +directive_open = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "open" + ~ WHITESPACE+ ~ acct_descriptor + ~ comment_or_newline +} +// YYYY-MM-DD close Account +directive_close = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "close" + ~ WHITESPACE+ ~ acct_descriptor + ~ comment_or_newline +} +// YYYY-MM-DD commodity Currency +directive_commodity = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "commodity" + ~ WHITESPACE+ ~ currency + ~ comment_or_newline +} +// YYYY-MM-DD balance Account Amount +balance_directive = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "balance" + ~ WHITESPACE+ ~ acct_descriptor + ~ WHITESPACE+ ~ decimal_value + ~ WHITESPACE+ ~ currency + ~ comment_or_newline +} + +// TODO: other directives to implement +// YYYY-MM-DD document Account PathToDocument +// YYYY-MM-DD event Name Value +// YYYY-MM-DD note Account Description +// YYYY-MM-DD pad Account AccountPad +// YYYY-MM-DD price Commodity Price +// include Filename +// option Name Value +// plugin ModuleName StringConfig +// poptag +// pushtag + +directives = { balance_directive | directive_close | directive_commodity | directive_open } + +// The rule for a complete ledger +beancount_ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } + + +// +// Pest's built-in rules: +// ASCII_ALPHA_LOWER = { 'a'..'z' } +// ASCII_ALPHA_UPPER = { 'A'..'Z' } +// ASCII_ALPHA = { ASCII_ALPHA_LOWER | ASCII_ALPHA_UPPER } +// ASCII_DIGIT = { '0'..'9' } +// ASCII_ALPHANUMERIC = { ASCII_ALPHA | ASCII-DIGIT } +// +// Avoid using WHITE_SPACE which targets [unicode](https://www.unicode.org/reports/tr31/#R3a) +// diff --git a/pta-parser/src/grammars/generic.pest b/pta-parser/src/grammars/generic.pest new file mode 100644 index 0000000..40768ae --- /dev/null +++ b/pta-parser/src/grammars/generic.pest @@ -0,0 +1,113 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + +// Each acct token must begin with alpha and may be followed by any number of alpha or number +// Full account descriptors are comprised of colon-separated account names. The top-level +// account name must begin with an alpha char, but subaccounts may begin with alphanumeric. +top_level_acct = @{ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } +sub_acct = @{ acct_separator ~ ASCII_ALPHANUMERIC+ } + +// The full acct descriptor must be one or more acct tokens, each separated by a colon +acct_descriptor = @{ top_level_acct ~ (sub_acct)* } + +decimal_value = @{ (("-" ~ NUMBER+) | NUMBER+) ~ "." ~ NUMBER+ } + + +// TODO: consider more lax indent rules +// Posting lines of a transaction must begin with 1 tab or 2 spaces +posting_indent = _{ "\t" | " "{2} } +// A basic posting must specify an account and a value, ending with a comment or newline +posting_basic = @{ + posting_indent + ~ acct_descriptor + ~ WHITESPACE+ ~ decimal_value + ~ comment_or_newline +} + +// TODO: improve on 'text' to allow more in description +trans_description_text = _{ (ASCII_ALPHANUMERIC+ | WHITESPACE)+ } +// TODO: is this the full set of annotation options? +trans_annotation = _{ "txn" | "*" | "!" } +trans_description = _{ "\"" ~ trans_description_text ~ "\"" } + +// TODO: how to ensure col 0 / no ws for header row +// The header of a transaction specifies the date, an annotation, a description, and ends with a comment or newline +trans_header = @{ + iso8601_date_extended + ~ WHITESPACE+ + ~ trans_annotation + ~ WHITESPACE+ + ~ trans_description + ~ comment_or_newline +} + +// A transaction begins with a single header followed by one or more postings. Whether the transaction balances is +// outside the scope of parsing. +transaction_block = @{ trans_header ~ posting_basic+ } + + +options = { "operating_currency" } + +// TODO: open works but is incomplete +// YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] +directive_open = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "open" + ~ WHITESPACE+ ~ acct_descriptor + ~ comment_or_newline +} +// YYYY-MM-DD close Account +directive_close = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "close" + ~ WHITESPACE+ ~ acct_descriptor + ~ comment_or_newline +} +// YYYY-MM-DD commodity Currency +directive_commodity = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "commodity" + ~ WHITESPACE+ ~ currency + ~ comment_or_newline +} +// YYYY-MM-DD balance Account Amount +balance_directive = @{ + iso8601_date_extended + ~ WHITESPACE+ ~ "balance" + ~ WHITESPACE+ ~ acct_descriptor + ~ WHITESPACE+ ~ decimal_value + ~ WHITESPACE+ ~ currency + ~ comment_or_newline +} + +// TODO: other directives to implement +// YYYY-MM-DD document Account PathToDocument +// YYYY-MM-DD event Name Value +// YYYY-MM-DD note Account Description +// YYYY-MM-DD pad Account AccountPad +// YYYY-MM-DD price Commodity Price +// include Filename +// option Name Value +// plugin ModuleName StringConfig +// poptag +// pushtag + +directives = { balance_directive | directive_close | directive_commodity | directive_open } + +// The rule for a complete ledger +generic_ledger = { SOI ~ (options | directives | transaction_block | comment | empty_line)+ ~ EOI } + + +// +// Pest's built-in rules: +// ASCII_ALPHA_LOWER = { 'a'..'z' } +// ASCII_ALPHA_UPPER = { 'A'..'Z' } +// ASCII_ALPHA = { ASCII_ALPHA_LOWER | ASCII_ALPHA_UPPER } +// ASCII_DIGIT = { '0'..'9' } +// ASCII_ALPHANUMERIC = { ASCII_ALPHA | ASCII-DIGIT } +// +// Avoid using WHITE_SPACE which targets [unicode](https://www.unicode.org/reports/tr31/#R3a) +// diff --git a/pta-parser/src/lib.rs b/pta-parser/src/lib.rs new file mode 100644 index 0000000..734f0e3 --- /dev/null +++ b/pta-parser/src/lib.rs @@ -0,0 +1,19 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +pub extern crate pest; +pub extern crate pest_derive; +#[cfg(test)] +pub extern crate rstest; + +// Export ledger parser +pub mod parsers; +// pub use parsers::generic::*; +// pub use parsers::beancount::*; +// pub use parsers::*; + + +pub mod parser_tests; +pub use parser_tests::*; \ No newline at end of file diff --git a/pta-parser/src/parser_tests/basics.rs b/pta-parser/src/parser_tests/basics.rs new file mode 100644 index 0000000..2ec2bfc --- /dev/null +++ b/pta-parser/src/parser_tests/basics.rs @@ -0,0 +1,173 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +#[cfg(test)] use super::*; +#[cfg(test)] use rstest::rstest; + +// mod generic; + +#[cfg(test)] +mod acct_desc { + use super::*; + + #[rstest] + #[case ("a")] + #[case ("a1")] + #[case ("a:a")] + #[case ("a1:a")] + #[case ("a1:a1")] + #[case ("a:123")] // subaccts beginning w/number + #[case ("a1:sub:123")] + #[case ("asset")] + #[case ("asset:property")] + #[case ("asset:property:real")] + fn can_parse_acct_descriptor(#[case] acct_desc: &str) { + + let pairs = generic::Parser::parse( + generic::Rule::acct_descriptor, acct_desc) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + // NOTE: invalid lead char in first acct segment ("1bad") fails top_level_acct rule & is included in verify_top_level_acct_parsing_error cases. + #[case ("a1:b@d")] + #[case ("bad1:")] // invalid: ends with acct descriptor separator (colon) + #[should_panic(expected = "expected acct_descriptor")] + #[ignore = "unexpectedly parses without error"] + fn verify_acct_descriptor_parsing_error(#[case] bad_acct_desc: &str) { + + generic::Parser::parse( + generic::Rule::acct_descriptor, bad_acct_desc) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_acct_desc); + assert!(false); + } + + + + #[rstest] + #[case ("1")] // invalid: number as lead char + #[case ("1b")] + #[case ("1-b")] // invalid: non-alphanumeric char + #[case ("1b-")] + #[should_panic(expected = "expected top_level_acct")] + fn verify_top_level_acct_parsing_error(#[case] bad_top_level_acct: &str) { + + generic::Parser::parse( + generic::Rule::top_level_acct, bad_top_level_acct) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_top_level_acct); + assert!(false); + + } +} + + +#[cfg(test)] +mod decimal { + use super::*; + + #[rstest] + #[case ("0.00000001")] + #[case ("1.23")] + #[case ("123.456")] + #[case ("-123.456789012")] // negative values + #[case ("-0.00000001")] + fn can_parse_decimal_value(#[case] dec: &str) { + + let pairs = generic::Parser::parse( + generic::Rule::decimal_value, dec) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[cfg(test)] + #[rstest] + #[case ("0.")] // fractional segment missing + #[case ("-0.")] + #[case ("123")] + #[case ("-123")] + #[case (".12")] // whole segment missing + #[case ("-.12")] + + #[should_panic(expected = "expected decimal_value")] + fn verify_decimal_value_error(#[case] bad_dec: &str) { + + generic::Parser::parse( + generic::Rule::decimal_value, bad_dec) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_dec); + assert!(false); + } + +} + + + +#[cfg(test)] +mod iso8601 { + use super::*; + + #[rstest] + #[case ("1900-01-01")] + #[case ("2015-12-31")] + fn can_parse_iso8601_date_extended(#[case] year: &str) { + + let pairs = generic::Parser::parse( + generic::Rule::iso8601_date_extended, year) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + + #[rstest] + #[case ("000-01-01")] // Year out of range + #[case ("99990-01-01")] + #[case ("01-01")] // year segment missing + + #[case ("1999")] // month segment missing + #[case ("1999-")] + #[case ("0000-00-01")] // Month out of range + #[case ("0000-13-01")] + + #[case ("1999-12")] // day segment missing + #[case ("1999-12-")] + #[case ("0000-01-00")] // Day out of range + #[case ("0000-01-32")] + + #[case ("000o-01-01")] // Invalid chars + #[case ("1999-0x-12")] + #[case ("1999-12-0x")] + + #[case ("1999 12-01")] // whitespace (ensure atomic rule modifier is used) + #[case ("1999-12 01")] + #[case (" 1999-12-01")] // leading space (reqs additional rule) + #[should_panic(expected = "expected iso8601_")] // matches errors from multiple iso8601 rules + fn verify_iso8601_date_extended_error(#[case] bad_date: &str) { + + generic::Parser::parse( + generic::Rule::iso8601_date_extended, bad_date) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_date); + assert!(false); + } +} diff --git a/pta-parser/src/parser_tests/mod.rs b/pta-parser/src/parser_tests/mod.rs new file mode 100644 index 0000000..4de4151 --- /dev/null +++ b/pta-parser/src/parser_tests/mod.rs @@ -0,0 +1,102 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + +pub use super::*; +pub use pest::{Parser, iterators::Pairs}; +#[cfg(test)] +pub use rstest::rstest; + +use super::parsers::*; + +mod basics; +mod transaction; + + +#[cfg(test)] +mod directives { + use super::*; + + // YYYY-MM-DD open Account [ConstraintCurrency,...] ["BookingMethod"] + + #[rstest] + #[case (generic::Rule::directive_open, "2001-09-11 open assets")] + #[case (generic::Rule::directive_open, "2001-09-11 open assets:cash")] + #[case (generic::Rule::directive_open, "2001-09-11 open Assets1:cash2:3petty")] + #[case (generic::Rule::directive_close, "2001-09-11 close assets")] + #[case (generic::Rule::directive_close, "2001-09-11 close assets1:2cash:3petty")] + #[case (generic::Rule::directive_commodity, "2001-09-11 commodity USD")] + #[case (generic::Rule::balance_directive, "2001-09-11 balance assets 123.456 USD")] + #[case (generic::Rule::balance_directive, "2001-09-11 balance assets1:2cash -0.456 USD")] + fn can_parse_misc_directive(#[case] r: generic::Rule, #[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(r, &tc).len() > 0); + } + } + +} + + + +#[cfg(test)] +mod ledger_file { + use super::*; + + #[rstest] + #[case ( + "; an asterisk-based comment + * Accounts + 2001-09-11 open assets + 2001-09-11 open assets:cash\t;comment + 2001-09-12 close assets + + ;; Balance assertions + 2001-09-11 balance assets 123.456 USD + + + ;; Misc + 1792-01-01 commodity USD ; US Dollar + 2001-09-11 commodity BTC ; Bitcoin launch date + + ")] + fn can_parse_ledger(#[case] year: &str) { + + let pairs = generic::Parser::parse( + generic::Rule::generic_ledger, year) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } +} + + + + + +pub fn get_pairs(r: generic::Rule, content: &str) -> Pairs<'_, generic::Rule> { + let x = generic::Parser::parse( + r, + + content) + .unwrap_or_else(|e| panic!("{}", e)); + + return x; +} + diff --git a/pta-parser/src/parser_tests/transaction.rs b/pta-parser/src/parser_tests/transaction.rs new file mode 100644 index 0000000..b5a06e1 --- /dev/null +++ b/pta-parser/src/parser_tests/transaction.rs @@ -0,0 +1,216 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +#[cfg(test)] use super::*; +#[cfg(test)] use rstest::rstest; + + + +#[cfg(test)] +mod posting { + use super::*; + + #[rstest] + #[case (" Assets:subacct1 1.0000")] + #[case ("\tEquity \t -1.0000")] + fn can_parse_posting_basic(#[case] base: &str) { + + // NOTE: addons must end in \n to match rules + let addons = [ + "\n" + ," \n" + ,"\t\n" + ," ; comment 123 ; \n" + ,"\t;\tcomment 123 ;\t\n" + ]; + + for suffix in addons.iter() { + + let tc = format!("{}{}", base, suffix); + println!("Test case: {}", tc); + + assert!(get_pairs(generic::Rule::posting_basic, &tc).len() > 0); + } + } + + + #[rstest] + #[case (" Assets:subacct1 1.0000")] // Too many leading spaces or tabs + #[case (" \tEquity \t -1.0000")] + #[case ("\t Equity \t -1.0000")] + + #[should_panic(expected = "expected posting_basic")] // matches errors from multiple iso8601 rules + fn verify_posting_basic_error(#[case] bad_date: &str) { + + generic::Parser::parse( + generic::Rule::posting_basic, bad_date) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_date); + assert!(false); + } +} + + +#[cfg(test)] +mod trans_block { + use super::*; + + + // An example beancount transaction + // 2016-01-28 * " Buy BTC" ; 10:01 am, xfer id 56aa57787199a73d29000650 + // Assets:Exchanges:Coinbase 1.03683606 BTC { 381.9697397 USD, 2016-01-28 } + // Assets:Bank:AllyChk -400.00 USD ; verified w/register + // Liabilities:Fees:Coinbase 3.96 USD + // Liabilities:Fees:Adjustment 0.00000005 USD + + #[rstest] + // #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\" ;comment \n\tAssets 1.0000 ;posting comment\n\tEquity -1.0000 \n")] + #[case ("2009-01-09 ! \"Bitcoin launch date\"\n\tassets 1.0000\n equity -1.0000\n")] + fn can_parse_trans_block(#[case] tblock: &str) { + + let pairs = generic::Parser::parse( + generic::Rule::transaction_block, &tblock) + .unwrap_or_else(|e| panic!("{}", e)); + + // Parsing succeeded; ensure at least 1 pair was returned + assert!(pairs.len() > 0); + } + + #[rstest] + // #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\" + ")] + #[should_panic(expected = "expected transaction_block")] + fn verify_trans_block_posting_error(#[case] bad_block: &str) { + generic::Parser::parse( + generic::Rule::transaction_block, &bad_block) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_block); + assert!(false); + } + + // REVIEW: Are these cases duplicative of trans_header tests? + #[rstest] + #[ignore = "wip"] + #[case ("2009-01-09 ! \"Bitcoin launch date\"")] + #[should_panic(expected = "expected trans_header")] + fn verify_trans_block_trans_header_error(#[case] bad_block: &str) { + generic::Parser::parse( + generic::Rule::transaction_block, &bad_block) + .unwrap_or_else(|e| panic!("{}", e)); + + // should never reach this code since all cases should result in panic + println!("Test case '{}' should fail to parse!", bad_block); + assert!(false); + } + +} + + +//=========== +// NOTE: The tests in trans_header can be used by removing the silent indicator ('_') from the relevant pest rules. +// These rules were silenced to simplify processing in code (matching, etc.), but can be un-silenced for debugging, etc. +//=========== + +// #[cfg(test)] +// mod trans_header { +// use super::*; + + +// #[rstest] +// // NOTE: use simple text in case; test function wraps in dbl quotes +// #[case ("a")] +// #[case ("description")] +// #[case (" a description ")] +// #[case ("\ta description\twith tabs ")] +// fn can_parse_trans_descr(#[case] descr: &str) { + +// let quoted_descr = format!("\"{}\"", descr); +// let pairs = Parser::parse( +// Rule::trans_description, "ed_descr) +// .unwrap_or_else(|e| panic!("{}", e)); + +// // Parsing succeeded; ensure at least 1 pair was returned +// assert!(pairs.len() > 0); +// } + + +// #[rstest] +// // NOTE: use simple text in case; test function wraps in dbl quotes +// #[case ("")] // empty - no text +// #[case (" ")] // empty - only ws +// #[case ("\ta description\twith tabs and\n a newline")] // newline is invalid +// #[should_panic(expected = "expected trans_")] +// fn verify_trans_descr_error(#[case] bad_descr: &str) { + +// let quoted_bad_descr = format!("\"{}\"", bad_descr); +// Parser::parse( +// Rule::trans_description, "ed_bad_descr) +// .unwrap_or_else(|e| panic!("{}", e)); + +// // should never reach this code since all cases should result in panic +// println!("Test case '{}' should fail to parse!", quoted_bad_descr); +// assert!(false); +// } + + + +// #[rstest] +// // Verify transaction annotations: !, *, txn +// #[case ("2009-01-09 ! \"Bitcoin launch date\"")] +// #[case ("2009-01-09 * \"Bitcoin launch date\"")] +// #[case ("2009-01-09 txn \"Bitcoin launch date\"")] +// // whitespace variations +// #[case ("2010-01-09 * \"multi whitespace test\"")] +// #[case ("2011-01-09\t!\t\"tab test\"")] +// #[case ("2011-01-09\ttxn\t\"tab test\"")] +// #[case ("2012-01-09 * \"trailing tab test\"\t")] +// #[case ("2013-01-09 ! \"trailing spaces test\" ")] +// #[case ("2014-01-09 ! \"trailing tabs and spaces test\" \t \t\t ")] +// // #[ignore = "TBD: handle special chars in transaction description"] +// // #[case ("2009-01-09 ! \"Special chars in description: !@#$%^&*()-_=+\"")] +// fn can_parse_trans_header(#[case] base: &str) { + +// // NOTE: addons must end in \n to match rules +// let addons = [ +// "\n" +// ," \n" +// ,"\t\n" +// ," ; comment 123 ; \n" +// ,"\t;\tcomment 123 ;\t\n" +// ]; + +// for suffix in addons.iter() { + +// let tc = format!("{}{}", base, suffix); +// println!("Test case: {}", tc); + +// assert!(get_pairs(Rule::trans_header, &tc).len() > 0); +// } + +// } + +// #[rstest] +// #[case ("2016-01-28 * \"comment after description w/o whitespace\"; 10:01 am, xfer id 56aa57787199a73d29000650\n")] +// #[should_panic(expected = "expected trans_header")] +// fn verify_trans_header_error(#[case] bad_hdr: &str) { + +// let quoted_bad_descr = format!("\"{}\"", bad_hdr); +// Parser::parse( +// Rule::trans_header, "ed_bad_descr) +// .unwrap_or_else(|e| panic!("{}", e)); + +// // should never reach this code since all cases should result in panic +// println!("Test case '{}' should fail to parse!", quoted_bad_descr); +// assert!(false); +// } + + +// } diff --git a/pta-parser/src/parsers/mod.rs b/pta-parser/src/parsers/mod.rs new file mode 100644 index 0000000..184fdf4 --- /dev/null +++ b/pta-parser/src/parsers/mod.rs @@ -0,0 +1,30 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + + + +use pest_derive::*; + +pub mod generic { + + use super::*; + #[derive(Parser)] + #[grammar = "./grammars/base.pest"] + #[grammar = "./grammars/generic.pest"] + pub struct Parser; + +} + + +pub mod beancount { + + use super::*; + + #[derive(Parser)] + #[grammar = "./grammars/base.pest"] + #[grammar = "./grammars/beancount.pest"] + pub struct Parser; + +} \ No newline at end of file diff --git a/pta-types/Cargo.toml b/pta-types/Cargo.toml new file mode 100644 index 0000000..0fa4763 --- /dev/null +++ b/pta-types/Cargo.toml @@ -0,0 +1,23 @@ +# Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +# +# This project is licensed under the terms of the MIT license (cf. LICENSE file in root). + +[package] +name = "pta-types" +version.workspace = true +authors.workspace = true +# respository.workspace = true +edition.workspace = true + + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +bench = false + +[dependencies] +log = "0.4.20" +pest = "2.7.3" +pest_derive = "2.7.3" + +[dev-dependencies] +rstest = "0.19.0" diff --git a/pta-types/src/lib.rs b/pta-types/src/lib.rs new file mode 100644 index 0000000..6cc9d70 --- /dev/null +++ b/pta-types/src/lib.rs @@ -0,0 +1,22 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +#[derive(Default, Clone)] +pub struct FilePosition { + pub line: usize, + pub col: usize // TODO: u16? u32 is probably overkill +} + +#[derive(Default, Clone)] +pub struct ParserInfo { + pub position: FilePosition, +} + + +pub mod parsed_ledger; +pub use parsed_ledger::*; + +pub mod raw_transaction; +pub use raw_transaction::*; \ No newline at end of file diff --git a/pta-types/src/parsed_ledger.rs b/pta-types/src/parsed_ledger.rs new file mode 100644 index 0000000..ab6d3d1 --- /dev/null +++ b/pta-types/src/parsed_ledger.rs @@ -0,0 +1,19 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +use super::*; +use raw_transaction; + + +#[derive(Default)] +pub struct ParsedLedger { + xns: Vec, +} + +impl ParsedLedger { + pub fn add_transaction(self: &mut Self, xn: raw_transaction::RawTransaction) { + self.xns.push(xn); + } +} \ No newline at end of file diff --git a/pta-types/src/raw_transaction.rs b/pta-types/src/raw_transaction.rs new file mode 100644 index 0000000..ed20ea9 --- /dev/null +++ b/pta-types/src/raw_transaction.rs @@ -0,0 +1,32 @@ +// Copyright (C) 2023, AltaModa Technologies, LLC. All rights reserved. +// +// This project is licensed under the terms of the MIT license (cf. LICENSE file in root). +// + +use super::*; + + +#[derive(Default, Clone)] +pub struct RawAccountDescriptor { + pub path: String, + pub pinfo: ParserInfo, +} + + +#[derive(Default, Clone)] +pub struct RawTransaction { + pub date: String, + pub anno: String, + pub desc: String, + pub postings: Vec, + pub comment: String, + pub pinfo: ParserInfo, +} + +#[derive(Default, Clone)] +pub struct RawPosting { + pub acct: RawAccountDescriptor, + pub value: f64, + pub comment: String, + pub pinfo: ParserInfo, +} diff --git a/testdata/basic-ledger b/testdata/basic-ledger new file mode 100644 index 0000000..87cbe0c --- /dev/null +++ b/testdata/basic-ledger @@ -0,0 +1,31 @@ +;; Accounts +2001-09-11 open Assets +2001-09-11 open Assets:Cash +2001-09-12 close Assets + +* Commodities +1792-01-01 commodity USD +; name: "US Dollar" +; export: "CASH" +2009-01-09 commodity BTC +; name: "Bitcoin" +; export: "CryptoCurrency" +; price: "USD:coinbase/BTC-USD" + +;; Balance assertions +2001-09-11 balance Assets:Cash 123.456 USD +2001-09-11 balance Liabilities -123.456 USD + + +* Transactions +2009-01-09 ! "Bitcoin launch date" + assets:subacct1 1.0000 + equity -1.0000 + +2001-09-12 * "some transaction" + assets 1234.5678 + equity -1234.5678 + +; Transaction with a single posting - not a valid transaction, but should parse. +2001-12-31 txn "txn" + assets 0.00000000