Skip to content

Commit

Permalink
added a greedy algorithm, a pruning procedure to remove 1-1-pairs
Browse files Browse the repository at this point in the history
  • Loading branch information
junipertcy committed Jul 31, 2020
1 parent f3ad931 commit a3912d3
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 22 deletions.
38 changes: 34 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,40 @@ First things first, install the required libraries:
pip install -r requirements.txt
```

Now, try:
To warm up, try:
```python
python is_simplicial.py -k degs.txt -s sizes.txt
python is_simplicial.py -k datasets/00_degs.txt -s datasets/00_sizes.txt
```
By running the snippet several times, you may notice that the algorithm is not deterministic.
Indeed, we are building the state space tree with the backtracking algorithm,
and therefore sampling the ensemble that satisfies the joint degree sequence
(defined by `00_degs.txt` & `00_sizes.txt`). In other words, the joint sequence is simplicial!

It should tell you that the joint degree sequence defined by `degs.txt` & `sizes.txt` is simplicial!
However, currently, the code only works for very small systems.
The backtracking algorithm is an exhaustive search;
that is, all feasible solutions are considered and it will always find the optimal solution.
This means that, unfortunately, our sampling algorithm only works for very small systems.

We still lack a proper existence test to check whether a joint sequence is simplicial.
Luckily, when the sequence is simplicial,
we do find a greedy deterministic algorithm that picks up an simplicial instance!
This can work in fairly large inputs.

For example,
```python
python is_simplicial.py -k datasets/01_degs.txt -s datasets/01_sizes.txt --greedy
```
The dataset is the [crime network dataset](https://github.com/jg-you/scm/blob/master/datasets/crime_facet_list.txt)
from the [Phys. Rev. E paper](https://doi.org/10.1103/PhysRevE.96.032312), having 551 nodes and 194 facets.

Moreover, we find that the greedy algorithm can go with two directions,
and sometimes only the "backward direction" work.

For example,
```python
python is_simplicial.py -k datasets/02_degs.txt -s datasets/02_sizes.txt --greedy --no-forward
```

Interesting? I think it is!

## MISC notes (to clean up later)
* The graphical [Erdős–Gallai theorem](https://en.wikipedia.org/wiki/Erd%C5%91s%E2%80%93Gallai_theorem).
96 changes: 81 additions & 15 deletions SimplicialTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,25 @@ def log_forbidden(self, name, reason_id):
}


def prune_ones(size_list, degree_list):
_1 = count_summary(size_list)[1]
_2 = count_summary(degree_list)[1]
if _1 > _2:
print("Too many 0-simplices. We cannot satisfy the inclusive constraint.")
else:
for _ in range(_1):
size_list.remove(1)
degree_list.remove(1)
return size_list, degree_list


def count_summary(seq):
d = defaultdict(int)
for s in seq:
d[s] += 1
return d


class SimplicialTest(SimplexRegistrar):
"""Base class for SimplicialCheck.
Expand Down Expand Up @@ -80,12 +99,15 @@ def update_deg_seq(self, facet, value):
def checkpoint_1(self):
return np.all(np.sort(self.degree_list) - np.sort(self.deg_seq) >= 0)

def _break_symmetry(self):
def _break_symmetry(self, greedy=False, forward=True):
m = self.sorted_s.pop(0)
picked_facet, picked_facet_id = self.register(random.sample(range(self.n), k=m))
if greedy:
picked_facet, picked_facet_id = self.sample_simplex_greedy([], m, forward=forward)
else:
picked_facet, picked_facet_id = self.register(random.sample(range(self.n), k=m))
self.update_deg_seq(picked_facet, +1)
identifier = [picked_facet_id]
self.symmetry_breaker = picked_facet_id # Actually, `picked_facet_id` is always 0 here.
self.symmetry_breaker = picked_facet_id
return identifier

def ensure_valid_draw(self, identifier, size):
Expand All @@ -105,25 +127,70 @@ def ensure_valid_draw(self, identifier, size):

pass

@staticmethod
def count_summary(seq):
d = defaultdict(int)
for s in seq:
d[s] += 1
return d
def sample_simplex_greedy(self, identifier, size, forward=True):
deg = self.compute_joint_seq_from_identifier(identifier, sorted_deg=False)[1]
larger_selected_simplex_ids = self.get_selected_facet_ids(identifier, size)

candidate_facet = []

if forward:
shift = 0
while len(candidate_facet) < size:
if shift >= self.n:
print("This sequence may not be simplicial.")
raise NotImplementedError

# print(candidate_facet, shift)
if len(candidate_facet) == size - 1: # the last vertex to choose
for _id in larger_selected_simplex_ids:
while set(candidate_facet + [shift]).issubset(set(self.id2name[_id])):
shift += 1
continue
# print(deg, self._sorted_d, shift, candidate_facet)
if deg[shift] + 1 <= self._sorted_d[shift]:
candidate_facet += [shift]
shift += 1
else:
shift = self.n - 1
while len(candidate_facet) < size:
if shift < 0:
print("This sequence may not be simplicial.")
raise NotImplementedError

# print(candidate_facet, shift)
if len(candidate_facet) == size - 1: # the last vertex to choose
for _id in larger_selected_simplex_ids:
while set(candidate_facet + [shift]).issubset(set(self.id2name[_id])):
shift -= 1
continue
# print(deg, self._sorted_d, shift, candidate_facet)
if deg[shift] + 1 <= self._sorted_d[shift]:
candidate_facet += [shift]
shift -= 1
picked_facet, picked_facet_id = self.register(candidate_facet)
# print(picked_facet, picked_facet_id)
return picked_facet, picked_facet_id

def sample_simplex(self, identifier, size):
def get_selected_facet_ids(self, identifier, size):
return [index for index, i in enumerate(self.facet_size_per_id) if (index in identifier and i >= size)]

def sample_simplex(self, identifier, size, greedy=False, forward=True):
"""
Parameters
----------
identifier
size
greedy
forward
Returns
-------
"""
if greedy:
return self.sample_simplex_greedy(identifier, size, forward=forward)

# Here, we have a good criterion! We may not need to explore further if...
deg_sequence = np.array(self.compute_joint_seq_from_identifier(identifier)[1])
deg_sequence_goal = self._sorted_d
Expand All @@ -136,8 +203,7 @@ def sample_simplex(self, identifier, size):
self._backtrack_steps = 1
return list(), -1

larger_selected_simplex_ids = [index for index, i in enumerate(self.facet_size_per_id) if
(index in identifier and i >= size)]
larger_selected_simplex_ids = self.get_selected_facet_ids(identifier, size)

set_of_vertices = set(range(self.n))
picked_facet, picked_facet_id = self.register(random.sample(list(set_of_vertices), k=size))
Expand Down Expand Up @@ -168,7 +234,7 @@ def sample_simplex(self, identifier, size):

return picked_facet, picked_facet_id

def is_simplicial(self):
def is_simplicial(self, greedy=False, forward=True):
if max(self.degree_list) > self.m:
print("1. This can never be simplicial.") # TODO.... why??
return False
Expand All @@ -180,7 +246,7 @@ def is_simplicial(self):
return False
# TODO: there is a second part of the GR criterion, which is not coded yet.

identifier = self._break_symmetry()
identifier = self._break_symmetry(greedy=greedy, forward=forward)
if len(self.sorted_s) == 0:
if sorted(self.deg_seq, reverse=True) == self._sorted_d: # TODO: start from identifier
self.identifier = identifier
Expand All @@ -192,7 +258,7 @@ def is_simplicial(self):
if len(self.logbook) == self._len_logbook:
self._counter += 1
s = self.sorted_s.pop(0)
picked_facet, picked_facet_id = self.sample_simplex(identifier, s)
picked_facet, picked_facet_id = self.sample_simplex(identifier, s, greedy=greedy, forward=forward)
if len(picked_facet) == 0:
self.sorted_s = [s] + self.sorted_s
self._pull_the_plug(identifier, self._backtrack_steps)
Expand Down
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions datasets/01_degs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 1 1 2 1 1 1 1 1 1 1 1 1 5 1 1 1 1 1 1 2 1 4 2 1 1 1 1 1 1 1 2 1 2 1 1 1 1 8 3 1 1 2 1 2 6 5 2 2 1 1 3 2 1 2 1 1 2 3 1 1 1 1 1 1 1 1 2 1 2 2 4 1 1 4 1 3 1 1 1 4 2 1 1 1 1 1 1 1 3 2 1 1 3 7 2 1 2 1 1 2 2 1 2 1 1 2 2 2 14 1 3 1 1 2 1 4 1 3 1 1 1 1 2 2 2 1 1 2 2 2 2 2 2 2 1 1 1 1 3 1 1 1 1 1 3 1 2 1 1 2 1 2 1 2 2 1 2 1 3 1 1 1 1 1 1 2 1 3 1 1 1 3 2 1 1 2 1 1 1 1 1 2 1 3 1 1 2 5 1 1 1 2 2 2 3 2 2 2 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1 2 2 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 2 2 1 2 1 1 2 1 2 1 1 2 1 1 2 4 2 1 3 2 1 1 1 1 1 1 3 1 1 3 2 1 2 1 2 1 3 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 4 2 1 1 2 2 1 2 1 1 1 2 1 3 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 2 3 3 1 1 2 1 2 1 2 1 2 1 2 2 1 1 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 1 1 2 2 1 1 1 1 1 1 1 2 1 1 1 1 2 2 1 2 2 1 1 1 1 2 1 1 2 1 2 3 4 2 4 1 1 1 1 2 1 1 1 1 1 1 1 5 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 1 1 1 1 1 3 1 2 1 1 1 1 2 1 1 1 1 2 3 2 1 1 1 1 1 1 1 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 2 2 1 2 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 change: 1 addition & 0 deletions datasets/01_sizes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
25 22 18 17 14 12 11 11 11 10 10 9 9 9 9 9 9 9 8 8 8 8 8 7 7 7 7 7 7 7 7 7 7 6 6 6 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1
1 change: 1 addition & 0 deletions datasets/02_degs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 2 2 3 2
1 change: 1 addition & 0 deletions datasets/02_sizes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2 3 4 2
10 changes: 7 additions & 3 deletions is_simplicial.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import click
from SimplicialTest import SimplicialTest
from SimplicialTest import *


@click.command()
@click.option('-k', '--degree_seq_file', 'degree_sequence', type=click.File('r'), help='Path to degree sequence file.')
@click.option('-s', '--size_seq_file', 'size_sequence', type=click.File('r'), help='Path to size sequence file.')
def is_simplicial(degree_sequence, size_sequence):
@click.option('--greedy/--no-greedy', default=False, help='Enable the Havel–Hakimi-type recursive algorithm.')
@click.option('--forward/--no-forward', default=True,
help='[works only when greedy is on] Direction of the recursive algorithm.')
def is_simplicial(degree_sequence, size_sequence, greedy, forward):
degree_sequence = list(map(int, degree_sequence.read().replace("\n", "").split(" ")))
size_sequence = list(map(int, size_sequence.read().replace("\n", "").split(" ")))
size_sequence, degree_sequence = prune_ones(size_sequence, degree_sequence)
st = SimplicialTest(degree_sequence, size_sequence)
result = st.is_simplicial()
result = st.is_simplicial(greedy=greedy, forward=forward)
if result is True:
print(f"Yes, the joint sequence is simplicial. \nThe complex is: {st.identifier2facets(st.identifier)}")
else:
Expand Down

0 comments on commit a3912d3

Please sign in to comment.