Skip to content

Commit

Permalink
Remove need for sort in new_with_metadata (apache#8855)
Browse files Browse the repository at this point in the history
BTreeMap gives stable iteration order, so we don't need to sort

Speeds up benchmarks in sql_planner.rs by 3-8%
  • Loading branch information
simonvandel authored Jan 15, 2024
1 parent 1dcdcd4 commit ff728d6
Showing 1 changed file with 5 additions and 11 deletions.
16 changes: 5 additions & 11 deletions datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! DFSchema is an extended schema struct that DataFusion uses to provide support for
//! fields with optional relation names.
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeSet, HashMap};
use std::convert::TryFrom;
use std::fmt::{Display, Formatter};
use std::hash::Hash;
Expand Down Expand Up @@ -135,8 +135,8 @@ impl DFSchema {
fields: Vec<DFField>,
metadata: HashMap<String, String>,
) -> Result<Self> {
let mut qualified_names = HashSet::new();
let mut unqualified_names = HashSet::new();
let mut qualified_names = BTreeSet::new();
let mut unqualified_names = BTreeSet::new();

for field in &fields {
if let Some(qualifier) = field.qualifier() {
Expand All @@ -148,14 +148,8 @@ impl DFSchema {
}
}

// check for mix of qualified and unqualified field with same unqualified name
// note that we need to sort the contents of the HashSet first so that errors are
// deterministic
let mut qualified_names = qualified_names
.iter()
.map(|(l, r)| (l.to_owned(), r.to_owned()))
.collect::<Vec<(&OwnedTableReference, &String)>>();
qualified_names.sort();
// Check for mix of qualified and unqualified fields with same unqualified name.
// The BTreeSet storage makes sure that errors are reported in deterministic order.
for (qualifier, name) in &qualified_names {
if unqualified_names.contains(name) {
return _schema_err!(SchemaError::AmbiguousReference {
Expand Down

0 comments on commit ff728d6

Please sign in to comment.