Skip to content

Commit

Permalink
Remove OrderedDict usage, and rely on ordered nature of dictionaries.
Browse files Browse the repository at this point in the history
matthewwardrop committed Jul 11, 2023
1 parent 2c63b6a commit 26763ff
Showing 5 changed files with 33 additions and 46 deletions.
17 changes: 6 additions & 11 deletions docsite/docs/guides/model_specs.ipynb
Original file line number Diff line number Diff line change
@@ -238,18 +238,13 @@
"data": {
"text/plain": [
"{'column_names': ('Intercept', 'center(a)', 'b[T.B]', 'b[T.C]'),\n",
" 'column_indices': OrderedDict([('Intercept', 0),\n",
" ('center(a)', 1),\n",
" ('b[T.B]', 2),\n",
" ('b[T.C]', 3)]),\n",
" 'column_indices': {'Intercept': 0, 'center(a)': 1, 'b[T.B]': 2, 'b[T.C]': 3},\n",
" 'terms': [1, center(a), b],\n",
" 'term_indices': OrderedDict([(1, [0]), (center(a), [1]), (b, [2, 3])]),\n",
" 'term_slices': OrderedDict([(1, slice(0, 1, None)),\n",
" (center(a), slice(1, 2, None)),\n",
" (b, slice(2, 4, None))]),\n",
" 'term_variables': OrderedDict([(1, set()),\n",
" (center(a), {'a', 'center'}),\n",
" (b, {'b'})]),\n",
" 'term_indices': {1: [0], center(a): [1], b: [2, 3]},\n",
" 'term_slices': {1: slice(0, 1, None),\n",
" center(a): slice(1, 2, None),\n",
" b: slice(2, 4, None)},\n",
" 'term_variables': {1: set(), center(a): {'a', 'center'}, b: {'b'}},\n",
" 'variable_terms': {'center': {center(a)}, 'a': {center(a)}, 'b': {b}},\n",
" 'variable_indices': {'center': [1], 'a': [1], 'b': [2, 3]},\n",
" 'variables': {'a', 'b', 'center'},\n",
6 changes: 3 additions & 3 deletions formulaic/materializers/base.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
import itertools
import operator
from abc import abstractmethod
from collections import defaultdict, OrderedDict, namedtuple
from collections import defaultdict, namedtuple
from typing import (
Any,
Dict,
@@ -233,7 +233,7 @@ def _build_model_matrix(
# Step 2: Generate the columns which will be collated into the full matrix
cols = []
for term, scoped_terms in scoped_terms_for_terms:
scoped_cols = OrderedDict()
scoped_cols = {}
for scoped_term in scoped_terms:
if not scoped_term.factors:
scoped_cols[
@@ -876,7 +876,7 @@ def _get_columns_for_term(
Returns:
dict
"""
out = OrderedDict()
out = {}
for reverse_product in itertools.product(
*(factor.items() for factor in reversed(factors))
):
3 changes: 1 addition & 2 deletions formulaic/materializers/pandas.py
Original file line number Diff line number Diff line change
@@ -2,7 +2,6 @@

import functools
import itertools
from collections import OrderedDict
from typing import Any, Dict, List, Sequence, Set, Tuple, cast, TYPE_CHECKING

import numpy
@@ -123,7 +122,7 @@ def _encode_categorical(
def _get_columns_for_term(
self, factors: List[Dict[str, Any]], spec: ModelSpec, scale: float = 1
) -> Dict[str, Any]:
out = OrderedDict()
out = {}

names = [
":".join(reversed(product))
20 changes: 9 additions & 11 deletions formulaic/model_spec.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import warnings
from collections import defaultdict, OrderedDict
from collections import defaultdict
from dataclasses import dataclass, field, replace
from typing import (
Any,
@@ -168,15 +168,15 @@ def feature_names(self) -> Sequence[str]:
return self.column_names

@cached_property
def column_indices(self) -> OrderedDict[str, int]:
def column_indices(self) -> Dict[str, int]:
"""
An ordered mapping from column names to the column index in generated
model matrices.
"""
return OrderedDict([(name, i) for i, name in enumerate(self.column_names)])
return {name: i for i, name in enumerate(self.column_names)}

@property
def feature_indices(self) -> OrderedDict[str, int]:
def feature_indices(self) -> Dict[str, int]:
"""
A deprecated reference to `ModelSpec.column_indices`. Will be removed in
v1.0.0.
@@ -196,7 +196,7 @@ def terms(self) -> List[Term]:
return self.formula.root

@cached_property
def term_indices(self) -> OrderedDict[Term, List[int]]:
def term_indices(self) -> Dict[Term, List[int]]:
"""
An ordered mapping of `Term` instances to the generated column indices.
@@ -210,7 +210,7 @@ def term_indices(self) -> OrderedDict[Term, List[int]]:
"likely be resolved by using the `ModelSpec` instance attached "
"to the model matrix generated when calling `.get_model_matrix()`."
)
slices = OrderedDict()
slices = {}
start = 0
for row in self.structure:
end = start + len(row[2])
@@ -219,7 +219,7 @@ def term_indices(self) -> OrderedDict[Term, List[int]]:
return slices

@cached_property
def term_slices(self) -> OrderedDict[Term, slice]:
def term_slices(self) -> Dict[Term, slice]:
"""
An ordered mapping of `Term` instances to a slice that when used on
the columns of the model matrix will subsample the model matrix down to
@@ -229,9 +229,7 @@ def term_slices(self) -> OrderedDict[Term, slice]:
up elements of this mapping using the string representation of the
`Term`.
"""
return OrderedDict(
{k: slice(v[0], v[-1] + 1) for k, v in self.term_indices.items()}
)
return {k: slice(v[0], v[-1] + 1) for k, v in self.term_indices.items()}

@cached_property
def term_variables(self) -> Dict[Term, Set[Variable]]:
@@ -247,7 +245,7 @@ def term_variables(self) -> Dict[Term, Set[Variable]]:
"likely be resolved by using the `ModelSpec` instance attached "
"to the model matrix generated when calling `.get_model_matrix()`."
)
term_variables = OrderedDict()
term_variables = {}
start = 0
for row in self.structure:
end = start + len(row[2])
33 changes: 14 additions & 19 deletions tests/test_model_spec.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from collections import OrderedDict
from pyexpat import model
import re

@@ -71,25 +70,21 @@ def test_attributes(self, model_spec):
assert (
model_spec.column_indices
== model_spec.feature_indices
== OrderedDict(
[
("Intercept", 0),
("a", 1),
("A[T.b]", 2),
("A[T.c]", 3),
("A[T.b]:a", 4),
("A[T.c]:a", 5),
]
)
)
assert model_spec.term_slices == OrderedDict(
[
("1", slice(0, 1)),
("a", slice(1, 2)),
("A", slice(2, 4)),
("A:a", slice(4, 6)),
]
== {
"Intercept": 0,
"a": 1,
"A[T.b]": 2,
"A[T.c]": 3,
"A[T.b]:a": 4,
"A[T.c]:a": 5,
}
)
assert model_spec.term_slices == {
"1": slice(0, 1),
"a": slice(1, 2),
"A": slice(2, 4),
"A:a": slice(4, 6),
}
assert model_spec.terms == ["1", "a", "A", "A:a"]
assert model_spec.term_variables == {
"1": set(),

0 comments on commit 26763ff

Please sign in to comment.