diff --git a/README.md b/README.md index 86d1e2d..e07f8cf 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,40 @@ First things first, install the required libraries: pip install -r requirements.txt ``` -Now, try: +To warm up, try: ```python -python is_simplicial.py -k degs.txt -s sizes.txt +python is_simplicial.py -k datasets/00_degs.txt -s datasets/00_sizes.txt ``` +By running the snippet several times, you may notice that the algorithm is not deterministic. +Indeed, we are building the state space tree with the backtracking algorithm, +and therefore sampling the ensemble that satisfies the joint degree sequence +(defined by `00_degs.txt` & `00_sizes.txt`). In other words, the joint sequence is simplicial! -It should tell you that the joint degree sequence defined by `degs.txt` & `sizes.txt` is simplicial! -However, currently, the code only works for very small systems. \ No newline at end of file +The backtracking algorithm is an exhaustive search; +that is, all feasible solutions are considered and it will always find the optimal solution. +This means that, unfortunately, our sampling algorithm only works for very small systems. + +We still lack a proper existence test to check whether a joint sequence is simplicial. +Luckily, when the sequence is simplicial, +we do find a greedy deterministic algorithm that picks up an simplicial instance! +This can work in fairly large inputs. + +For example, +```python +python is_simplicial.py -k datasets/01_degs.txt -s datasets/01_sizes.txt --greedy +``` +The dataset is the [crime network dataset](https://github.com/jg-you/scm/blob/master/datasets/crime_facet_list.txt) +from the [Phys. Rev. E paper](https://doi.org/10.1103/PhysRevE.96.032312), having 551 nodes and 194 facets. + +Moreover, we find that the greedy algorithm can go with two directions, +and sometimes only the "backward direction" work. + +For example, +```python +python is_simplicial.py -k datasets/02_degs.txt -s datasets/02_sizes.txt --greedy --no-forward +``` + +Interesting? I think it is! + +## MISC notes (to clean up later) +* The graphical [Erdős–Gallai theorem](https://en.wikipedia.org/wiki/Erd%C5%91s%E2%80%93Gallai_theorem). \ No newline at end of file diff --git a/SimplicialTest.py b/SimplicialTest.py index 1b38e00..951f6a5 100644 --- a/SimplicialTest.py +++ b/SimplicialTest.py @@ -35,6 +35,25 @@ def log_forbidden(self, name, reason_id): } +def prune_ones(size_list, degree_list): + _1 = count_summary(size_list)[1] + _2 = count_summary(degree_list)[1] + if _1 > _2: + print("Too many 0-simplices. We cannot satisfy the inclusive constraint.") + else: + for _ in range(_1): + size_list.remove(1) + degree_list.remove(1) + return size_list, degree_list + + +def count_summary(seq): + d = defaultdict(int) + for s in seq: + d[s] += 1 + return d + + class SimplicialTest(SimplexRegistrar): """Base class for SimplicialCheck. @@ -80,12 +99,15 @@ def update_deg_seq(self, facet, value): def checkpoint_1(self): return np.all(np.sort(self.degree_list) - np.sort(self.deg_seq) >= 0) - def _break_symmetry(self): + def _break_symmetry(self, greedy=False, forward=True): m = self.sorted_s.pop(0) - picked_facet, picked_facet_id = self.register(random.sample(range(self.n), k=m)) + if greedy: + picked_facet, picked_facet_id = self.sample_simplex_greedy([], m, forward=forward) + else: + picked_facet, picked_facet_id = self.register(random.sample(range(self.n), k=m)) self.update_deg_seq(picked_facet, +1) identifier = [picked_facet_id] - self.symmetry_breaker = picked_facet_id # Actually, `picked_facet_id` is always 0 here. + self.symmetry_breaker = picked_facet_id return identifier def ensure_valid_draw(self, identifier, size): @@ -105,25 +127,70 @@ def ensure_valid_draw(self, identifier, size): pass - @staticmethod - def count_summary(seq): - d = defaultdict(int) - for s in seq: - d[s] += 1 - return d + def sample_simplex_greedy(self, identifier, size, forward=True): + deg = self.compute_joint_seq_from_identifier(identifier, sorted_deg=False)[1] + larger_selected_simplex_ids = self.get_selected_facet_ids(identifier, size) + + candidate_facet = [] + + if forward: + shift = 0 + while len(candidate_facet) < size: + if shift >= self.n: + print("This sequence may not be simplicial.") + raise NotImplementedError + + # print(candidate_facet, shift) + if len(candidate_facet) == size - 1: # the last vertex to choose + for _id in larger_selected_simplex_ids: + while set(candidate_facet + [shift]).issubset(set(self.id2name[_id])): + shift += 1 + continue + # print(deg, self._sorted_d, shift, candidate_facet) + if deg[shift] + 1 <= self._sorted_d[shift]: + candidate_facet += [shift] + shift += 1 + else: + shift = self.n - 1 + while len(candidate_facet) < size: + if shift < 0: + print("This sequence may not be simplicial.") + raise NotImplementedError + + # print(candidate_facet, shift) + if len(candidate_facet) == size - 1: # the last vertex to choose + for _id in larger_selected_simplex_ids: + while set(candidate_facet + [shift]).issubset(set(self.id2name[_id])): + shift -= 1 + continue + # print(deg, self._sorted_d, shift, candidate_facet) + if deg[shift] + 1 <= self._sorted_d[shift]: + candidate_facet += [shift] + shift -= 1 + picked_facet, picked_facet_id = self.register(candidate_facet) + # print(picked_facet, picked_facet_id) + return picked_facet, picked_facet_id - def sample_simplex(self, identifier, size): + def get_selected_facet_ids(self, identifier, size): + return [index for index, i in enumerate(self.facet_size_per_id) if (index in identifier and i >= size)] + + def sample_simplex(self, identifier, size, greedy=False, forward=True): """ Parameters ---------- identifier size + greedy + forward Returns ------- """ + if greedy: + return self.sample_simplex_greedy(identifier, size, forward=forward) + # Here, we have a good criterion! We may not need to explore further if... deg_sequence = np.array(self.compute_joint_seq_from_identifier(identifier)[1]) deg_sequence_goal = self._sorted_d @@ -136,8 +203,7 @@ def sample_simplex(self, identifier, size): self._backtrack_steps = 1 return list(), -1 - larger_selected_simplex_ids = [index for index, i in enumerate(self.facet_size_per_id) if - (index in identifier and i >= size)] + larger_selected_simplex_ids = self.get_selected_facet_ids(identifier, size) set_of_vertices = set(range(self.n)) picked_facet, picked_facet_id = self.register(random.sample(list(set_of_vertices), k=size)) @@ -168,7 +234,7 @@ def sample_simplex(self, identifier, size): return picked_facet, picked_facet_id - def is_simplicial(self): + def is_simplicial(self, greedy=False, forward=True): if max(self.degree_list) > self.m: print("1. This can never be simplicial.") # TODO.... why?? return False @@ -180,7 +246,7 @@ def is_simplicial(self): return False # TODO: there is a second part of the GR criterion, which is not coded yet. - identifier = self._break_symmetry() + identifier = self._break_symmetry(greedy=greedy, forward=forward) if len(self.sorted_s) == 0: if sorted(self.deg_seq, reverse=True) == self._sorted_d: # TODO: start from identifier self.identifier = identifier @@ -192,7 +258,7 @@ def is_simplicial(self): if len(self.logbook) == self._len_logbook: self._counter += 1 s = self.sorted_s.pop(0) - picked_facet, picked_facet_id = self.sample_simplex(identifier, s) + picked_facet, picked_facet_id = self.sample_simplex(identifier, s, greedy=greedy, forward=forward) if len(picked_facet) == 0: self.sorted_s = [s] + self.sorted_s self._pull_the_plug(identifier, self._backtrack_steps) diff --git a/degs.txt b/datasets/00_degs.txt similarity index 100% rename from degs.txt rename to datasets/00_degs.txt diff --git a/sizes.txt b/datasets/00_sizes.txt similarity index 100% rename from sizes.txt rename to datasets/00_sizes.txt diff --git a/datasets/01_degs.txt b/datasets/01_degs.txt new file mode 100644 index 0000000..784b829 --- /dev/null +++ b/datasets/01_degs.txt @@ -0,0 +1 @@ +1 1 1 2 1 1 1 1 1 1 1 1 1 5 1 1 1 1 1 1 2 1 4 2 1 1 1 1 1 1 1 2 1 2 1 1 1 1 8 3 1 1 2 1 2 6 5 2 2 1 1 3 2 1 2 1 1 2 3 1 1 1 1 1 1 1 1 2 1 2 2 4 1 1 4 1 3 1 1 1 4 2 1 1 1 1 1 1 1 3 2 1 1 3 7 2 1 2 1 1 2 2 1 2 1 1 2 2 2 14 1 3 1 1 2 1 4 1 3 1 1 1 1 2 2 2 1 1 2 2 2 2 2 2 2 1 1 1 1 3 1 1 1 1 1 3 1 2 1 1 2 1 2 1 2 2 1 2 1 3 1 1 1 1 1 1 2 1 3 1 1 1 3 2 1 1 2 1 1 1 1 1 2 1 3 1 1 2 5 1 1 1 2 2 2 3 2 2 2 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1 2 2 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 2 2 1 2 1 1 2 1 2 1 1 2 1 1 2 4 2 1 3 2 1 1 1 1 1 1 3 1 1 3 2 1 2 1 2 1 3 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 4 2 1 1 2 2 1 2 1 1 1 2 1 3 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 2 3 3 1 1 2 1 2 1 2 1 2 1 2 2 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 1 1 2 2 1 1 1 1 1 1 1 2 1 1 1 1 2 2 1 2 2 1 1 1 1 2 1 1 2 1 2 3 4 2 4 1 1 1 1 2 1 1 1 1 1 1 1 5 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 1 1 1 1 1 3 1 2 1 1 1 1 2 1 1 1 1 2 3 2 1 1 1 1 1 1 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 2 2 1 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 diff --git a/datasets/01_sizes.txt b/datasets/01_sizes.txt new file mode 100644 index 0000000..7e9e868 --- /dev/null +++ b/datasets/01_sizes.txt @@ -0,0 +1 @@ +25 22 18 17 14 12 11 11 11 10 10 9 9 9 9 9 9 9 8 8 8 8 8 7 7 7 7 7 7 7 7 7 7 6 6 6 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 diff --git a/datasets/02_degs.txt b/datasets/02_degs.txt new file mode 100644 index 0000000..2f98a81 --- /dev/null +++ b/datasets/02_degs.txt @@ -0,0 +1 @@ +2 2 2 3 2 diff --git a/datasets/02_sizes.txt b/datasets/02_sizes.txt new file mode 100644 index 0000000..a3aabb8 --- /dev/null +++ b/datasets/02_sizes.txt @@ -0,0 +1 @@ +2 3 4 2 diff --git a/is_simplicial.py b/is_simplicial.py index 8176aed..f32374f 100644 --- a/is_simplicial.py +++ b/is_simplicial.py @@ -1,15 +1,19 @@ import click -from SimplicialTest import SimplicialTest +from SimplicialTest import * @click.command() @click.option('-k', '--degree_seq_file', 'degree_sequence', type=click.File('r'), help='Path to degree sequence file.') @click.option('-s', '--size_seq_file', 'size_sequence', type=click.File('r'), help='Path to size sequence file.') -def is_simplicial(degree_sequence, size_sequence): +@click.option('--greedy/--no-greedy', default=False, help='Enable the Havel–Hakimi-type recursive algorithm.') +@click.option('--forward/--no-forward', default=True, + help='[works only when greedy is on] Direction of the recursive algorithm.') +def is_simplicial(degree_sequence, size_sequence, greedy, forward): degree_sequence = list(map(int, degree_sequence.read().replace("\n", "").split(" "))) size_sequence = list(map(int, size_sequence.read().replace("\n", "").split(" "))) + size_sequence, degree_sequence = prune_ones(size_sequence, degree_sequence) st = SimplicialTest(degree_sequence, size_sequence) - result = st.is_simplicial() + result = st.is_simplicial(greedy=greedy, forward=forward) if result is True: print(f"Yes, the joint sequence is simplicial. \nThe complex is: {st.identifier2facets(st.identifier)}") else: