diff --git a/.gitignore b/.gitignore index 6936990..98e5fcf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -/target +target **/*.rs.bk Cargo.lock diff --git a/datapond-derive/Cargo.toml b/datapond-derive/Cargo.toml new file mode 100644 index 0000000..216aa51 --- /dev/null +++ b/datapond-derive/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "datapond-derive" +version = "0.1.0" +authors = ["Vytautas Astrauskas "] +edition = "2018" + +[dependencies] +proc-macro-hack = "0.5" +datapond-macro = { path = "../datapond-macro" } + +[dev-dependencies] +trybuild = "1.0" +datafrog = "2" diff --git a/datapond-derive/src/lib.rs b/datapond-derive/src/lib.rs new file mode 100644 index 0000000..803c5bd --- /dev/null +++ b/datapond-derive/src/lib.rs @@ -0,0 +1,4 @@ +use proc_macro_hack::proc_macro_hack; + +#[proc_macro_hack] +pub use datapond_macro::datapond; diff --git a/datapond-derive/tests/pass/simple1.rs b/datapond-derive/tests/pass/simple1.rs new file mode 100644 index 0000000..2701ddd --- /dev/null +++ b/datapond-derive/tests/pass/simple1.rs @@ -0,0 +1,14 @@ +use datapond_derive::datapond; + +fn main() { + let inp = vec![(1, 2), (2, 3)]; + let out; + datapond! { + input inp(x: u32, y: u32) + output out(x: u32, y: u32) + out(x, y) :- inp(y, x). + }; + assert!(out.len() == 2); + assert!(out[0] == (2, 1)); + assert!(out[1] == (3, 2)); +} diff --git a/datapond-derive/tests/test.rs b/datapond-derive/tests/test.rs new file mode 100644 index 0000000..135782a --- /dev/null +++ b/datapond-derive/tests/test.rs @@ -0,0 +1,5 @@ +#[test] +fn tests() { + let runner = trybuild::TestCases::new(); + runner.pass("tests/pass/*.rs"); +} diff --git a/datapond-macro/Cargo.toml b/datapond-macro/Cargo.toml new file mode 100644 index 0000000..0e43391 --- /dev/null +++ b/datapond-macro/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "datapond-macro" +version = "0.1.0" +authors = ["Vytautas Astrauskas "] +edition = "2018" + +[lib] +proc-macro = true + +[dependencies] +datapond = { path = ".." } +proc-macro-hack = "0.5" diff --git a/datapond-macro/src/lib.rs b/datapond-macro/src/lib.rs new file mode 100644 index 0000000..dad8852 --- /dev/null +++ b/datapond-macro/src/lib.rs @@ -0,0 +1,7 @@ +use proc_macro::TokenStream; +use proc_macro_hack::proc_macro_hack; + +#[proc_macro_hack] +pub fn datapond(input: TokenStream) -> TokenStream { + datapond::generate_datafrog(input.into()).into() +} \ No newline at end of file diff --git a/src/data_structures.rs b/src/data_structures.rs new file mode 100644 index 0000000..c35599f --- /dev/null +++ b/src/data_structures.rs @@ -0,0 +1,48 @@ +use std::collections::HashMap; + +/// A map that tracks insertion order. +#[derive(Debug)] +pub struct OrderedMap +where + K: Eq + std::hash::Hash, +{ + key_order: Vec, + map: HashMap, +} + +impl OrderedMap { + pub fn len(&self) -> usize { + self.map.len() + } + pub fn insert(&mut self, k: K, v: V) { + assert!(self.map.insert(k.clone(), v).is_none()); + self.key_order.push(k); + } + pub fn get(&self, k: &K) -> Option<&V> { + self.map.get(k) + } + pub fn values<'a>(&'a self) -> Vec<&'a V> { + self.key_order.iter().map(|k| &self.map[k]).collect() + } +} + +impl std::iter::FromIterator<(K, V)> for OrderedMap { + fn from_iter>(iter: I) -> Self { + let mut s = Self { + key_order: Vec::new(), + map: HashMap::new(), + }; + for (k, v) in iter { + s.insert(k, v); + } + s + } +} + +impl std::ops::Index<&K> for OrderedMap { + type Output = V; + + fn index(&self, key: &K) -> &Self::Output { + &self.map[key] + } +} diff --git a/src/generator_new/ast.rs b/src/generator_new/ast.rs index 23cf7a3..fe3f440 100644 --- a/src/generator_new/ast.rs +++ b/src/generator_new/ast.rs @@ -34,7 +34,7 @@ //! let r = in.iter().map(|(y, x)| {(x, y)}); //! ``` -use std::collections::HashMap; +use crate::data_structures::OrderedMap; /// A Datalog variable. /// @@ -91,6 +91,12 @@ pub(crate) enum DVarTypes { }, } +impl std::convert::From> for DVarTypes { + fn from(types: Vec) -> Self { + DVarTypes::Tuple(types) + } +} + /// A Datafrog relation. #[derive(Debug)] pub(crate) struct RelationDecl { @@ -118,6 +124,25 @@ pub(crate) struct Variable { pub name: syn::Ident, } +impl Variable { + pub fn with_suffix(&self, suffix: &str) -> Self { + Self { + name: syn::Ident::new( + &format!("{}{}", self.name, suffix), + proc_macro2::Span::call_site(), + ), + } + } + pub fn with_counter(&self, counter: usize) -> Self { + Self { + name: syn::Ident::new( + &format!("{}{}", self.name, counter), + proc_macro2::Span::call_site(), + ), + } + } +} + /// An operation that reorders and potentially drops Datalog variables. /// /// It is encoded as a Datafrog `from_map`. @@ -171,20 +196,35 @@ pub(crate) struct FilterOp { expr: syn::Expr, } +/// An operation that inserts the relation into a variable. +#[derive(Debug)] +pub(crate) struct InsertOp { + /// The variable into which we want to insert the relation. + pub variable: Variable, + /// The relation to be inserted. + pub relation: Variable, +} + #[derive(Debug)] pub(crate) enum Operation { Reorder(ReorderOp), BindVar(BindVarOp), Join(JoinOp), Filter(FilterOp), + Insert(InsertOp), } /// A Datafrog iteration. #[derive(Debug)] pub(crate) struct Iteration { - pub relations: HashMap, - pub variables: HashMap, - pub operations: Vec, + pub relations: OrderedMap, + pub variables: OrderedMap, + /// Operations performed before entering the iteration. + pub pre_operations: Vec, + /// Operations performed in the body of the iteration. + pub body_operations: Vec, + /// Operations performed after exiting the iteration. + pub post_operations: Vec, } impl Iteration { @@ -198,17 +238,37 @@ impl Iteration { .into_iter() .map(|decl| (decl.var.name.clone(), decl)) .collect(), - operations: Vec::new(), + pre_operations: Vec::new(), + body_operations: Vec::new(), + post_operations: Vec::new(), } } + /// Convert a Datafrog relation to a Datafrog variable and return its identifier. + pub fn convert_relation_to_variable(&mut self, variable: &Variable) -> Variable { + let decl = &self.relations[&variable.name]; + let variable_decl = VariableDecl { + var: decl.var.with_counter(self.variables.len()), + typ: decl.typ.clone().into(), + is_output: false, + }; + let new_variable = variable_decl.var.clone(); + self.variables + .insert(new_variable.name.clone(), variable_decl); + self.pre_operations.push(Operation::Insert(InsertOp { + variable: new_variable.clone(), + relation: decl.var.clone(), + })); + new_variable + } + pub fn get_relation_var(&self, variable_name: &syn::Ident) -> Option { + self.relations + .get(variable_name) + .map(|decl| decl.var.clone()) + } pub fn get_variable(&self, variable_name: &syn::Ident) -> Variable { - if let Some(decl) = self.variables.get(variable_name) { - decl.var.clone() - } else { - self.relations[variable_name].var.clone() - } + self.variables[variable_name].var.clone() } pub fn add_operation(&mut self, operation: Operation) { - self.operations.push(operation); + self.body_operations.push(operation); } } diff --git a/src/generator_new/encode.rs b/src/generator_new/encode.rs index 961952e..f60e3cd 100644 --- a/src/generator_new/encode.rs +++ b/src/generator_new/encode.rs @@ -1,7 +1,7 @@ use crate::ast; use crate::generator_new::ast as gen; -fn encode(program: ast::Program) -> gen::Iteration { +pub(crate) fn encode(program: ast::Program) -> gen::Iteration { let mut relations = Vec::new(); let mut variables = Vec::new(); for decl in program.decls.values() { @@ -40,7 +40,11 @@ fn encode(program: ast::Program) -> gen::Iteration { if literal1.is_negated { unimplemented!(); } - let variable = iteration.get_variable(&literal1.predicate); + let variable = if let Some(variable) = iteration.get_relation_var(&literal1.predicate) { + iteration.convert_relation_to_variable(&variable) + } else { + iteration.get_variable(&literal1.predicate) + }; let args = literal1.args.clone(); // Retrieve the main variable for the head. @@ -111,13 +115,17 @@ mod tests { eprintln!("{}", tokens); let expected_tokens = TokenStream::from_str( r##" - let mut iteration = datafrog::Iteration::new(); - let inp = datafrog::Relation::from_vec:: <(u32, u32,)>(inp); - let out = iteration.variable:: <(u32, u32,)>("out"); - while iteration.changed() { - out.from_map(&inp, | &(y, x,)| (x, y,)); + { + let mut iteration = datafrog::Iteration::new(); + let var_inp = datafrog::Relation::from_vec(inp); + let var_out = iteration.variable:: <(u32, u32,)>("out"); + let var_inp1 = iteration.variable:: <(u32, u32,)>("inp1"); + var_inp1.insert(var_inp); + while iteration.changed() { + var_out.from_map(&var_inp1, | &(y, x,)| (x, y,)); + } + out = var_out.complete(); } - let out = out.complete(); "##, ) .unwrap(); diff --git a/src/generator_new/mod.rs b/src/generator_new/mod.rs index 7685554..90e8f32 100644 --- a/src/generator_new/mod.rs +++ b/src/generator_new/mod.rs @@ -1,3 +1,16 @@ +use proc_macro2::TokenStream; +use quote::ToTokens; + mod ast; mod encode; mod to_tokens; + +pub fn generate_datafrog(input: TokenStream) -> TokenStream { + let parsed_program = match syn::parse2(input) { + Ok(program) => program, + Err(err) => return TokenStream::from(err.to_compile_error()), + }; + let typechecked_program = crate::typechecker::typecheck(parsed_program).unwrap(); + let encoded_program = encode::encode(typechecked_program); + encoded_program.to_token_stream() +} \ No newline at end of file diff --git a/src/generator_new/to_tokens.rs b/src/generator_new/to_tokens.rs index a1824d4..df75364 100644 --- a/src/generator_new/to_tokens.rs +++ b/src/generator_new/to_tokens.rs @@ -1,5 +1,5 @@ use crate::generator_new::ast::*; -use proc_macro2::TokenStream; +use proc_macro2::{Span, TokenStream}; use quote::quote; use quote::ToTokens; @@ -68,8 +68,9 @@ impl ToTokens for DVarTypes { impl ToTokens for Variable { fn to_tokens(&self, tokens: &mut TokenStream) { - let name = &self.name; - tokens.extend(quote! {#name}); + let var_name = format!("var_{}", self.name); + let ident = syn::Ident::new(&var_name, Span::call_site()); + tokens.extend(quote! {#ident}); } } @@ -105,6 +106,15 @@ impl ToTokens for FilterOp { } } +impl ToTokens for InsertOp { + fn to_tokens(&self, tokens: &mut TokenStream) { + let InsertOp { variable, relation } = self; + tokens.extend(quote! { + #variable.insert(#relation); + }); + } +} + impl ToTokens for Operation { fn to_tokens(&self, tokens: &mut TokenStream) { match self { @@ -112,51 +122,60 @@ impl ToTokens for Operation { Operation::BindVar(op) => op.to_tokens(tokens), Operation::Join(op) => op.to_tokens(tokens), Operation::Filter(op) => op.to_tokens(tokens), + Operation::Insert(op) => op.to_tokens(tokens), } } } +fn operation_vec_to_tokens(operations: &Vec) -> TokenStream { + let mut tokens = TokenStream::new(); + for operation in operations { + operation.to_tokens(&mut tokens); + } + tokens +} + impl ToTokens for Iteration { fn to_tokens(&self, tokens: &mut TokenStream) { let mut declare_relations = TokenStream::new(); for relation in self.relations.values() { let vec_name = &relation.var.name; let var = relation.var.to_token_stream(); - let mut typ = TokenStream::new(); - for var_typ in &relation.typ { - typ.extend(quote! {#var_typ,}); - } declare_relations.extend(quote! { - let #var = datafrog::Relation::from_vec::<(#typ)>(#vec_name); + let #var = datafrog::Relation::from_vec(#vec_name); }); } let mut declare_variables = TokenStream::new(); let mut output_results = TokenStream::new(); for variable in self.variables.values() { let var = variable.var.to_token_stream(); - let var_name = var.to_string(); + let var_name = variable.var.name.to_string(); let typ = variable.typ.to_token_stream(); declare_variables.extend(quote! { let #var = iteration.variable::<#typ>(#var_name); }); if variable.is_output { + let new_var = &variable.var.name; output_results.extend(quote! { - let #var = #var.complete(); + #new_var = #var.complete(); }); } } - let mut operations = TokenStream::new(); - for operation in &self.operations { - operation.to_tokens(&mut operations); - } + let pre_operations = operation_vec_to_tokens(&self.pre_operations); + let body_operations = operation_vec_to_tokens(&self.body_operations); + let post_operations = operation_vec_to_tokens(&self.post_operations); tokens.extend(quote! { - let mut iteration = datafrog::Iteration::new(); - #declare_relations - #declare_variables - while iteration.changed() { - #operations + { + let mut iteration = datafrog::Iteration::new(); + #declare_relations + #declare_variables + #pre_operations + while iteration.changed() { + #body_operations + } + #post_operations + #output_results } - #output_results }); } } diff --git a/src/lib.rs b/src/lib.rs index 869eb84..3edb3e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,9 +2,11 @@ extern crate log; mod ast; +mod data_structures; mod generator; mod generator_new; mod parser; mod typechecker; pub use generator::generate_skeleton_datafrog; +pub use generator_new::generate_datafrog; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 74f87f5..56edc91 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -136,24 +136,29 @@ impl Parse for ast::RuleHead { impl Parse for ast::Rule { fn parse(input: ParseStream) -> syn::Result { let head = input.parse()?; - input.step(|cursor| { - let rest = match cursor.token_tree() { - Some((proc_macro2::TokenTree::Punct(ref punct), next)) - if punct.as_char() == ':' && punct.spacing() == proc_macro2::Spacing::Joint => - { - next - } - _ => return Err(cursor.error(":- expected")), - }; - match rest.token_tree() { - Some((proc_macro2::TokenTree::Punct(ref punct), next)) - if punct.as_char() == '-' => - { - Ok(((), next)) - } - _ => Err(cursor.error(":- expected")), - } - })?; + // FIXME: For some reason, when getting input from a procedural macro, + // a space is always inserted between `:` and `-`. Therefore, the parser + // needs to accept the variant with a space. + input.parse::()?; + input.parse::()?; + // input.step(|cursor| { + // let rest = match cursor.token_tree() { + // Some((proc_macro2::TokenTree::Punct(ref punct), next)) + // if punct.as_char() == ':' && punct.spacing() == proc_macro2::Spacing::Joint => + // { + // next + // } + // _ => return Err(cursor.error(":- expected")), + // }; + // match rest.token_tree() { + // Some((proc_macro2::TokenTree::Punct(ref punct), next)) + // if punct.as_char() == '-' => + // { + // Ok(((), next)) + // } + // _ => Err(cursor.error(":- expected")), + // } + // })?; let body: Punctuated = Punctuated::parse_separated_nonempty(input)?; // Allow trailing punctuation.