From e4f3a7deb433aefb0fd74387b3e45c4ebe54159a Mon Sep 17 00:00:00 2001 From: Ben Date: Sun, 10 Nov 2024 17:05:25 +0000 Subject: [PATCH] More things - Specification options - Add `duplicate_block` example - Formatting fixes --- checker/specification/build.rs | 92 ++++++++++++++------ checker/specification/test.rs | 15 ++-- parser/examples/duplicate_block.rs | 135 +++++++++++++++++++++++++++++ parser/examples/parse.rs | 108 ++++++++++++++++------- parser/examples/simple.rs | 12 +-- parser/src/block.rs | 3 +- parser/src/modules.rs | 2 +- src/cli.rs | 5 +- src/repl.rs | 2 +- 9 files changed, 296 insertions(+), 78 deletions(-) create mode 100644 parser/examples/duplicate_block.rs diff --git a/checker/specification/build.rs b/checker/specification/build.rs index f2cbe1e7..1b20193d 100644 --- a/checker/specification/build.rs +++ b/checker/specification/build.rs @@ -19,14 +19,18 @@ fn main() -> Result<(), Box> { if cfg!(feature = "staging") { let staging = read_to_string("./staging.md")?; - writeln!(&mut out, "mod staging {{ use super::check_errors; ").unwrap(); + writeln!(&mut out, "mod staging {{ ").unwrap(); + writeln!(&mut out, "use super::{{check_expected_diagnostics, TypeCheckOptions}}; ") + .unwrap(); markdown_lines_append_test_to_rust(staging.lines().enumerate(), &mut out)?; writeln!(&mut out, "}}").unwrap(); } if cfg!(feature = "all") { let to_implement = read_to_string("./to_implement.md")?; - writeln!(&mut out, "mod to_implement {{ use super::check_errors; ").unwrap(); + writeln!(&mut out, "mod to_implement {{ ").unwrap(); + writeln!(&mut out, "use super::{{check_expected_diagnostics, TypeCheckOptions}}; ") + .unwrap(); markdown_lines_append_test_to_rust(to_implement.lines().enumerate(), &mut out)?; writeln!(&mut out, "}}").unwrap(); } @@ -60,8 +64,20 @@ fn markdown_lines_append_test_to_rust( let heading = line.strip_prefix("####").unwrap().trim_start(); let test_title = heading_to_rust_identifier(heading); - let blocks = { - let mut blocks = Vec::new(); + pub struct File<'a> { + path: &'a str, + code: String, + } + + // pub struct Block { + // /// Vec for FS tests + // files: Vec, + // expected_diagnostics: Vec, + // options: Vec + // } + + let files = { + let mut files = Vec::::new(); let mut current_filename = None; for (_, line) in lines.by_ref() { // Also handles TSX @@ -74,10 +90,10 @@ fn markdown_lines_append_test_to_rust( for (_, line) in lines.by_ref() { if let Some(path) = line.strip_prefix("// in ") { if !code.trim().is_empty() { - blocks.push(( - current_filename.unwrap_or(DEFAULT_FILE_PATH), - mem::take(&mut code), - )); + files.push(File { + path: current_filename.unwrap_or(DEFAULT_FILE_PATH), + code: mem::take(&mut code), + }); } current_filename = Some(path); continue; @@ -88,40 +104,64 @@ fn markdown_lines_append_test_to_rust( code.push_str(line); code.push('\n') } - blocks.push((current_filename.unwrap_or(DEFAULT_FILE_PATH), code)); - blocks + files.push(File { path: current_filename.unwrap_or(DEFAULT_FILE_PATH), code }); + files }; - let errors = { - let mut errors = Vec::new(); + + let (expected_diagnostics, options) = { + let mut expected_diagnostics = Vec::new(); + let mut options = None::>; for (_, line) in lines.by_ref() { - if line.starts_with("#") { + if let (Some(args), false) = (line.strip_prefix("With "), options.is_some()) { + options = Some(args.split(',').collect()); + } else if line.starts_with("#") { panic!("block with no diagnostics or break between in {test_title}") - } else if line.starts_with('-') { - let error = - line.strip_prefix("- ").unwrap().replace('\\', "").replace('"', "\\\""); - errors.push(format!("\"{}\"", error)) - } else if !errors.is_empty() { + } else if let Some(diagnostic) = line.strip_prefix("-") { + let error = diagnostic.trim().replace('\\', "").replace('"', "\\\""); + expected_diagnostics.push(format!("\"{}\"", error)) + } else if !expected_diagnostics.is_empty() { break; } } - errors + (expected_diagnostics, options) }; - let errors = errors.join(", "); + let expected_diagnostics = expected_diagnostics.join(", "); let heading_idx = heading_idx + 1; - let code = blocks + // TODO don't allocate + let code_as_list = files .into_iter() - .map(|(path, content)| format!("(\"{path}\",r#\"{content}\"#),")) - .fold(String::new(), |mut acc, cur| { - acc.push_str(&cur); + .map(|File { path, code }| format!("(\"{path}\",r#\"{code}\"#),")) + .reduce(|mut acc, slice| { + acc.push_str(&slice); acc - }); + }) + .unwrap(); + + let options = if let Some(options) = options { + let arguments = options + .into_iter() + .map(|value| format!("{value}: true")) + .reduce(|mut acc, slice| { + acc.push_str(&slice); + acc.push_str(", "); + acc + }) + .unwrap(); + format!("Some(super::TypeCheckOptions {{ {arguments}, ..super::TypeCheckOptions::default() }})") + } else { + format!("None") + }; writeln!( out, "#[test] fn {test_title}() {{ - super::check_errors(\"{heading}\", {heading_idx}, &[{code}], &[{errors}]) + super::check_expected_diagnostics( + \"{heading}\", {heading_idx}, + &[{code_as_list}], &[{expected_diagnostics}], + {options} + ) }}", )?; } diff --git a/checker/specification/test.rs b/checker/specification/test.rs index 85166a18..10a662d5 100644 --- a/checker/specification/test.rs +++ b/checker/specification/test.rs @@ -11,13 +11,14 @@ use checker::{ diagnostics, source_map::{Nullable, SourceId}, synthesis::EznoParser, + TypeCheckOptions, }; // This is here as it is used in the included `/specification.rs` use parser::ASTNode; mod specification { - use super::check_errors; + use super::{check_expected_diagnostics, TypeCheckOptions}; // from build.rs include!(concat!(env!("OUT_DIR"), "/specification.rs")); @@ -37,12 +38,13 @@ const SIMPLE_DTS: Option<&str> = None; const IN_CI: bool = option_env!("CI").is_some(); /// Called by each test -fn check_errors( +fn check_expected_diagnostics( heading: &'static str, - _line: usize, + line: usize, // (Path, Content) code: &[(&'static str, &'static str)], expected_diagnostics: &[&'static str], + type_check_options: Option, ) { // let global_buffer = Arc::new(Mutex::new(String::new())); // let old_panic_hook = panic::take_hook(); @@ -59,10 +61,7 @@ fn check_errors( // }) // }); - // TODO could test these - let type_check_options = Default::default(); - - // eprintln!("{:?}", code); + let type_check_options = type_check_options.unwrap_or_default(); // let result = panic::catch_unwind(|| { @@ -125,7 +124,7 @@ fn check_errors( if diagnostics != expected_diagnostics { panic!( - "{}", + "In '{heading}' on line {line}, found\n{}", pretty_assertions::Comparison::new(expected_diagnostics, &diagnostics).to_string() ) } diff --git a/parser/examples/duplicate_block.rs b/parser/examples/duplicate_block.rs new file mode 100644 index 00000000..21183a2f --- /dev/null +++ b/parser/examples/duplicate_block.rs @@ -0,0 +1,135 @@ +use ezno_parser::{ + declarations::VariableDeclaration, + visiting::{Chain, ImmutableVariableOrProperty, VisitOptions, Visitor, Visitors}, + ASTNode, Declaration, Expression, Module, StatementOrDeclaration, VariableField, +}; +use std::collections::{HashMap, HashSet}; + +struct Offsets { + pub offsets: Vec, + /// TODO use &str references + pub top_level_variables: HashSet, + pub top_level_types: HashSet, +} + +/// TODO this could use visting right? +/// TODO abstract to library +/// TODO do for funtions and types +fn get_top_level_identifiers(m: &Module) -> (HashSet, HashSet) { + let (mut variables, mut types): (HashSet<_>, HashSet<_>) = Default::default(); + for item in &m.items { + match item { + StatementOrDeclaration::Declaration(Declaration::Variable(variable)) => { + match variable { + VariableDeclaration::ConstDeclaration { declarations, position: _ } => { + for declaration in declarations { + if let VariableField::Name(identifier) = declaration.name.get_ast_ref() + { + variables.insert(identifier.as_option_str().unwrap().to_owned()); + } + } + } + VariableDeclaration::LetDeclaration { declarations, position: _ } => { + for declaration in declarations { + if let VariableField::Name(identifier) = declaration.name.get_ast_ref() + { + variables.insert(identifier.as_option_str().unwrap().to_owned()); + } + } + } + } + } + StatementOrDeclaration::Declaration(Declaration::Function(function)) => { + variables.insert(function.on.name.identifier.as_option_str().unwrap().to_owned()); + } + _ => {} + } + } + (variables, types) +} + +fn main() { + let code = " +let x = 2; +let y = x + 2; +let z = 6; +" + .trim(); + + // function func() {{ return [x, z] }} + let module = Module::from_string(code.into(), Default::default()).unwrap(); + + let (top_level_variables, top_level_types) = get_top_level_identifiers(&module); + + let mut visitors = Visitors { + expression_visitors: vec![Box::new(NameReferenceFinder)], + statement_visitors: Default::default(), + variable_visitors: vec![Box::new(NameIndexFinder)], + block_visitors: Default::default(), + }; + + // eprintln!("variables={:#?}", (&top_level_variables, &top_level_types)); + + let mut offsets: Offsets = + Offsets { offsets: Default::default(), top_level_variables, top_level_types }; + + module.visit::( + &mut visitors, + &mut offsets, + &VisitOptions { visit_nested_blocks: true, reverse_statements: false }, + source_map::Nullable::NULL, + ); + + // TODO why is this backwards + // eprintln!("offsets={:#?}", offsets); + + offsets.offsets.sort_unstable(); + let mut rest = code.to_owned(); + for (idx, offset) in offsets.offsets.iter_mut().enumerate().rev() { + let current_offset = *offset as usize; + rest.insert_str(current_offset, "000"); + // need to ammed offset now string has been changed + *offset += ("000".len() * idx) as u32; + } + rest.push('\n'); + + let mut total = rest.clone(); + const SIZE: usize = 10; + total.reserve(rest.len() * (SIZE - 1)); + + for i in 1..SIZE { + let name = format!("{:03}", i); + for offset in offsets.offsets.iter().copied() { + let range = offset as usize..(offset as usize + 3); + rest.replace_range(range, &name); + } + + total.push_str(&rest); + } + + eprintln!("{}", total); +} + +/// TODO this could be collected in the same process as above +struct NameIndexFinder; + +impl<'a> Visitor, Offsets> for NameIndexFinder { + fn visit(&mut self, item: &ImmutableVariableOrProperty<'a>, data: &mut Offsets, chain: &Chain) { + if chain.len() == 1 && item.get_variable_name().is_some() { + data.offsets.push(item.get_position().end); + // data.insert(name.to_owned()); + } + } +} + +struct NameReferenceFinder; + +impl Visitor for NameReferenceFinder { + fn visit(&mut self, item: &Expression, data: &mut Offsets, _chain: &Chain) { + if let Expression::VariableReference(name, position) = item { + if data.top_level_variables.contains(name) { + data.offsets.push(position.end); + } + } + } +} diff --git a/parser/examples/parse.rs b/parser/examples/parse.rs index cd189381..ccd5b852 100644 --- a/parser/examples/parse.rs +++ b/parser/examples/parse.rs @@ -1,8 +1,10 @@ -use std::{collections::VecDeque, time::Instant}; +use std::{collections::VecDeque, path::Path, time::Instant}; use ezno_parser::{ASTNode, Comments, Module, ParseOptions, ToStringOptions}; use source_map::FileSystem; +type Files = source_map::MapFileStore; + fn main() -> Result<(), Box> { let mut args: VecDeque<_> = std::env::args().skip(1).collect(); let path = args.pop_front().ok_or("expected argument")?; @@ -18,20 +20,18 @@ fn main() -> Result<(), Box> { let display_keywords = args.iter().any(|item| item == "--keywords"); let extras = args.iter().any(|item| item == "--extras"); let partial_syntax = args.iter().any(|item| item == "--partial"); - let source_maps = args.iter().any(|item| item == "--source-map"); + let print_source_maps = args.iter().any(|item| item == "--source-map"); let timings = args.iter().any(|item| item == "--timings"); - let render_timings = args.iter().any(|item| item == "--render-timings"); let type_definition_module = args.iter().any(|item| item == "--type-definition-module"); let type_annotations = !args.iter().any(|item| item == "--no-type-annotations"); let top_level_html = args.iter().any(|item| item == "--top-level-html"); + let parse_imports = args.iter().any(|item| item == "--parse-imports"); let print_ast = args.iter().any(|item| item == "--ast"); - let render_output = args.iter().any(|item| item == "--render"); + let to_string_output = args.iter().any(|item| item == "--to-string"); let pretty = args.iter().any(|item| item == "--pretty"); - let now = Instant::now(); - // TODO temp const STACK_SIZE_MB: usize = 32; let parse_options = ParseOptions { @@ -52,14 +52,48 @@ fn main() -> Result<(), Box> { ..ParseOptions::default() }; - let mut fs = source_map::MapFileStore::::default(); - - let source = std::fs::read_to_string(path.clone())?; + let mut fs = Files::default(); + + let to_string_options = to_string_output.then(|| ToStringOptions { + expect_markers: true, + include_type_annotations: type_annotations, + pretty, + comments: if pretty { Comments::All } else { Comments::None }, + // 60 is temp + max_line_length: if pretty { 60 } else { u8::MAX }, + ..Default::default() + }); + + parse_path( + path.as_ref(), + timings, + parse_imports, + &parse_options, + print_ast, + print_source_maps, + &to_string_options, + display_keywords, + &mut fs, + ) +} +fn parse_path( + path: &Path, + timings: bool, + parse_imports: bool, + parse_options: &ParseOptions, + print_ast: bool, + print_source_maps: bool, + to_string_options: &Option, + display_keywords: bool, + fs: &mut Files, +) -> Result<(), Box> { + let source = std::fs::read_to_string(path)?; let source_id = fs.new_source_id(path.into(), source.clone()); - eprintln!("parsing {:?} bytes", source.len()); - let result = Module::from_string_with_options(source.clone(), parse_options, None); + eprintln!("parsing {:?} ({:?} bytes)", path.display(), source.len()); + let now = Instant::now(); + let result = Module::from_string_with_options(source.clone(), parse_options.clone(), None); match result { Ok((module, state)) => { @@ -70,45 +104,55 @@ fn main() -> Result<(), Box> { if print_ast { println!("{module:#?}"); } - if source_maps || render_output || render_timings { - let now = Instant::now(); - let to_string_options = ToStringOptions { - expect_markers: true, - include_type_annotations: type_annotations, - pretty, - comments: if pretty { Comments::All } else { Comments::None }, - // 60 is temp - max_line_length: if pretty { 60 } else { u8::MAX }, - ..Default::default() - }; + if let Some(to_string_options) = to_string_options { + let now = Instant::now(); let (output, source_map) = - module.to_string_with_source_map(&to_string_options, source_id, &fs); + module.to_string_with_source_map(to_string_options, source_id, fs); - if timings || render_timings { + if timings { eprintln!("ToString'ed in: {:?}", now.elapsed()); } - if source_maps { - let sm = source_map.unwrap().to_json(&fs); - println!("{output}\n{sm}"); - } - if render_output { - println!("{output}"); + + println!("{output}"); + if print_source_maps { + let sm = source_map.unwrap().to_json(fs); + println!("{sm}"); } } if display_keywords { - println!("{:?}", state.keyword_positions.unwrap()); + println!("{:?}", state.keyword_positions.as_ref()); } + if parse_imports { + for import in state.constant_imports.iter() { + // Don't reparse files (+ catches cycles) + let resolved_path = path.parent().unwrap().join(import); + if fs.get_paths().contains_key(&resolved_path) { + continue; + } + let _ = parse_path( + &resolved_path, + timings, + parse_imports, + parse_options, + print_ast, + print_source_maps, + to_string_options, + display_keywords, + fs, + )?; + } + } Ok(()) } Err(parse_err) => { let mut line_column = parse_err .position .with_source(source_id) - .into_line_column_span::(&fs); + .into_line_column_span::(fs); { // Editor are one indexed line_column.line_start += 1; diff --git a/parser/examples/simple.rs b/parser/examples/simple.rs index 6acbd9ec..46475a22 100644 --- a/parser/examples/simple.rs +++ b/parser/examples/simple.rs @@ -1,9 +1,9 @@ #[allow(unused)] -use ezno_parser::{ASTNode, Module, Expression}; +use ezno_parser::{ASTNode, Expression, Module}; fn main() { - let source = "'Hello World!'".to_owned(); - let parse_options = Default::default(); - let result = Expression::from_string_with_options(source.clone(), parse_options, Some(40)); - eprintln!("{result:#?}"); -} \ No newline at end of file + let source = "'Hello World!'".to_owned(); + let parse_options = Default::default(); + let result = Expression::from_string_with_options(source.clone(), parse_options, Some(40)); + eprintln!("{result:#?}"); +} diff --git a/parser/src/block.rs b/parser/src/block.rs index 91e13927..e37b0f20 100644 --- a/parser/src/block.rs +++ b/parser/src/block.rs @@ -396,7 +396,8 @@ pub(crate) fn parse_statements_and_declarations( if let TSXToken::EOS = kind { 1 } else { - let lines = state.line_starts.byte_indexes_crosses_lines(end as usize, next.0 as usize); + let lines = + state.line_starts.byte_indexes_crosses_lines(end as usize, next.0 as usize); lines.saturating_sub(1) } } else { diff --git a/parser/src/modules.rs b/parser/src/modules.rs index 3287a87b..777e575f 100644 --- a/parser/src/modules.rs +++ b/parser/src/modules.rs @@ -44,7 +44,7 @@ impl ASTNode for Module { state: &mut crate::ParsingState, options: &ParseOptions, ) -> ParseResult { - let start = reader.peek().map(|t| t.1.0).unwrap_or_default(); + let start = reader.peek().map(|t| t.1 .0).unwrap_or_default(); let span = Span { start, source: (), end: start + state.length_of_source }; let hashbang_comment = if let Some(crate::Token(TSXToken::HashBangComment(_), _)) = reader.peek() diff --git a/src/cli.rs b/src/cli.rs index fdd63f40..602ca8c4 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -182,8 +182,7 @@ fn run_checker( type_check_options: TypeCheckOptions, compact_diagnostics: bool, ) -> ExitCode { - let result = - check(entry_points, read_file, definition_file.as_deref(), type_check_options); + let result = check(entry_points, read_file, definition_file.as_deref(), type_check_options); let CheckOutput { diagnostics, module_contents, chronometer, types, .. } = result; @@ -287,7 +286,7 @@ pub fn run_cli