From b00b19c83db1aac80cb22a4d25fff1fbecaa1c85 Mon Sep 17 00:00:00 2001 From: Kye Date: Thu, 12 Oct 2023 10:44:43 -0400 Subject: [PATCH] multiswarm pso prototype --- swarms_torch/autoregressive.py | 29 ++--- swarms_torch/fish_school.py | 12 +- swarms_torch/graph_cellular_automa.py | 11 +- swarms_torch/ma_agent.py | 3 +- swarms_torch/multi_swarm_pso.py | 152 ++++++++++++++++++++++++++ swarms_torch/neuronal_transformer.py | 3 +- swarms_torch/particle_swarm.py | 3 +- swarms_torch/queen_bee.py | 24 ++-- swarms_torch/spiral_optimization.py | 6 +- swarms_torch/transformer_pso.py | 13 +-- 10 files changed, 190 insertions(+), 66 deletions(-) create mode 100644 swarms_torch/multi_swarm_pso.py diff --git a/swarms_torch/autoregressive.py b/swarms_torch/autoregressive.py index 8c0467a..79d79ac 100644 --- a/swarms_torch/autoregressive.py +++ b/swarms_torch/autoregressive.py @@ -47,8 +47,7 @@ def align_right(t, lens, pad_id=0): pad_lens = seq_len - lens max_pad_len = pad_lens.amax() - batch_arange = torch.arange( - batch, device=device, dtype=torch.long)[..., None] + batch_arange = torch.arange(batch, device=device, dtype=torch.long)[..., None] prompt_len_arange = torch.arange(seq_len, device=device, dtype=torch.long) t = F.pad(t, (max_pad_len, 0), value=0) @@ -66,8 +65,7 @@ def top_p(logits, thres=0.9): cum_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) sorted_indices_to_remove = cum_probs > thres - sorted_indices_to_remove = F.pad( - sorted_indices_to_remove, (1, -1), value=False) + sorted_indices_to_remove = F.pad(sorted_indices_to_remove, (1, -1), value=False) sorted_logits[sorted_indices_to_remove] = float("-inf") return sorted_logits.scatter(1, sorted_indices, sorted_logits) @@ -120,12 +118,8 @@ def contrastive_decode_fn(expert_logits, amateur_logits, alpha=0.1, beta=0.5): class AutoregressiveWrapper(Module): def __init__( - self, - net, - ignore_index=-100, - pad_value=0, - mask_prob=0.0, - add_attn_z_loss=False): + self, net, ignore_index=-100, pad_value=0, mask_prob=0.0, add_attn_z_loss=False + ): super().__init__() self.pad_value = pad_value self.ignore_index = ignore_index @@ -206,7 +200,7 @@ def generate( if exists(cache): for inter in cache.attn_intermediates: inter.cached_kv = [ - t[..., -(max_seq_len - 1):, :] for t in inter.cached_kv + t[..., -(max_seq_len - 1) :, :] for t in inter.cached_kv ] logits, new_cache = self.net( @@ -247,7 +241,8 @@ def generate( amateur_logits.shape == logits.shape ), "logits dimension are not the same between amateur and expert model" logits = contrastive_decode_fn( - logits, amateur_logits, **amateur_contrastive_decode_kwargs) + logits, amateur_logits, **amateur_contrastive_decode_kwargs + ) if cache_kv and amateur.can_cache_kv: amateur_caches[i] = next_amateur_cache @@ -299,14 +294,12 @@ def forward(self, x, **kwargs): kwargs.update(self_attn_kv_mask=mask) logits, cache = self.net( - inp, return_intermediates=True, return_attn_z_loss=add_attn_z_loss, **kwargs) + inp, return_intermediates=True, return_attn_z_loss=add_attn_z_loss, **kwargs + ) loss = F.cross_entropy( - rearrange( - logits, - "b n c -> b c n"), - target, - ignore_index=ignore_index) + rearrange(logits, "b n c -> b c n"), target, ignore_index=ignore_index + ) if add_attn_z_loss: loss = loss + cache.attn_z_loss diff --git a/swarms_torch/fish_school.py b/swarms_torch/fish_school.py index 971e22a..7f7a219 100644 --- a/swarms_torch/fish_school.py +++ b/swarms_torch/fish_school.py @@ -73,10 +73,8 @@ def __init__( ): super().__init__() self.model = Transformer( - d_model=dim, - nhead=heads, - num_encoder_layers=depth, - num_decoder_layers=depth) + d_model=dim, nhead=heads, num_encoder_layers=depth, num_decoder_layers=depth + ) self.optimizer = Adam(self.parameters()) self.scheduler = ReduceLROnPlateau(self.optimizer, "min") @@ -104,8 +102,7 @@ def train(self, src, tgt, labels): # weights if self.complexity_regularization: # complexity regularization - loss += self.alpha * sum(p.pow(2.0).sum() - for p in self.model.parameters()) + loss += self.alpha * sum(p.pow(2.0).sum() for p in self.model.parameters()) # backpropagation loss.backward() @@ -214,8 +211,7 @@ def forward(self, src, tgt, labels): # with higher food if self.complex_school: for fish in self.fish: - neighbor = self.fish[torch.randint( - 0, len(self.fish), (1,)).item()] + neighbor = self.fish[torch.randint(0, len(self.fish), (1,)).item()] if neighbor.food > fish.food: fish.model.load_state_dict(neighbor.model.state_dict()) diff --git a/swarms_torch/graph_cellular_automa.py b/swarms_torch/graph_cellular_automa.py index 1687c09..00229d5 100644 --- a/swarms_torch/graph_cellular_automa.py +++ b/swarms_torch/graph_cellular_automa.py @@ -36,9 +36,8 @@ def __init__(self, input_dim, hidden_dim): super(WeightUpdateModel, self).__init__() self.mlp = nn.Sequential( - nn.Linear( - input_dim, hidden_dim), nn.ReLU(), nn.Linear( - hidden_dim, 1)) + nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 1) + ) def forward(self, x): return self.mlp(x) @@ -52,8 +51,7 @@ def __init__(self, embedding_dim, hidden_dim): embedding_dim, hidden_dim, embedding_dim ) self.replication_model = ReplicationModel(embedding_dim, hidden_dim) - self.weight_update_model = WeightUpdateModel( - 2 * embedding_dim, hidden_dim) + self.weight_update_model = WeightUpdateModel(2 * embedding_dim, hidden_dim) def forward(self, node_embeddings, adjacency_matrix): # Update node embeddings using Graph Cellular Automata @@ -72,8 +70,7 @@ def forward(self, node_embeddings, adjacency_matrix): (updated_embeddings[i], updated_embeddings[j]) ) - edge_weights[i, j] = self.weight_update_model( - combined_embedding) + edge_weights[i, j] = self.weight_update_model(combined_embedding) return updated_embeddings, replication_decisions, edge_weights diff --git a/swarms_torch/ma_agent.py b/swarms_torch/ma_agent.py index a8c69d2..149576a 100644 --- a/swarms_torch/ma_agent.py +++ b/swarms_torch/ma_agent.py @@ -27,8 +27,7 @@ def __init__(self, env_name, num_agents): ) for _ in range(num_agents) ] - self.optimizers = [optim.Adam(agent.parameters()) - for agent in self.agents] + self.optimizers = [optim.Adam(agent.parameters()) for agent in self.agents] def step(self, agent_actions): rewards = [] diff --git a/swarms_torch/multi_swarm_pso.py b/swarms_torch/multi_swarm_pso.py new file mode 100644 index 0000000..5e6d8f8 --- /dev/null +++ b/swarms_torch/multi_swarm_pso.py @@ -0,0 +1,152 @@ +import torch +import random +import string + + +class MultiSwarmPSO: + """ + Multi-Swarm PSO Algorithm + + Parameters + ---------- + target_string : str + The target string to be generated + num_sub_swarms : int + The number of sub-swarms + num_particles_per_swarm : int + The number of particles per sub-swarm + max_iterations : int + The maximum number of iterations to run the algorithm + + Attributes + ---------- + target_string : str + The target string to be generated + num_sub_swarms : int + The number of sub-swarms + num_particles_per_swarm : int + The number of particles per sub-swarm + num_dimensions : int + The number of dimensions in the search space + max_iterations : int + The maximum number of iterations to run the algorithm + + Methods + ------- + generate_random_string() + Generates a random string of length num_dimensions + fitness_function(position) + Calculates the fitness of a given position + diversification_method(sub_swarms) + Adds a new sub-swarm if the number of sub-swarms is less than the maximum + optimize() + Runs the Multi-Swarm PSO algorithm + + References + ---------- + .. [1] https://www.researchgate.net/publication/221172800_Multi-swarm_Particle_Swarm_Optimization + + + Usage: + ------ + target_string = "hello world" + multi_swarm = MultiSwarm(target_string) + multi_swarm.optimize() + + + + """ + def __init__( + self, + target_string, + num_sub_swarms=5, + num_particles_per_swarm=20, + max_iterations=100, + ): + self.target_string = target_string + self.num_sub_swarms = num_sub_swarms + self.num_particles_per_swarm = num_particles_per_swarm + self.num_dimensions = len(target_string) + self.max_iterations = max_iterations + + def generate_random_string(self): + """ + Generates a random string of length num_dimensions + + """ + return "".join( + random.choice(string.ascii_lowercase + " ") + for _ in range(self.num_dimensions) + ) + + def fitness_function(self, position): + """Fitness function to be maximized""" + fitness = sum(a == b for a, b in zip(position, self.target_string)) + return fitness + + def diversification_method(self, sub_swarms): + """Diversification method to add a new sub-swarm if the number of sub-swarms is less than the maximum""" + if len(sub_swarms) < self.num_sub_swarms: + new_sub_swarm = [ + self.generate_random_string() + for _ in range(self.num_particles_per_swarm) + ] + sub_swarms.append(new_sub_swarm) + + def optimize(self): + """Optimizes the fitness function""" + sub_swarms = [ + [self.generate_random_string() for _ in range(self.num_particles_per_swarm)] + for _ in range(self.num_sub_swarms) + ] + + for iteration in range(self.max_iterations): + for sub_swarm in sub_swarms: + for particle in sub_swarm: + fitness = self.fitness_function(particle) + if fitness > 0: + index_to_change = random.randint(0, self.num_dimensions - 1) + new_char = random.choice(string.ascii_lowercase + " ") + new_position = list(particle) + new_position[index_to_change] = new_char + new_position = "".join(new_position) + particle = new_position + + self.diversification_method(sub_swarms) + + global_best_fitness = max( + self.fitness_function(particle) + for sub_swarm in sub_swarms + for particle in sub_swarm + ) + global_best_position = [ + particle + for sub_swarm in sub_swarms + for particle in sub_swarm + if self.fitness_function(particle) == global_best_fitness + ][0] + print( + f"Iteration {iteration}: Global Best Fitness = {global_best_fitness}, Global Best Position = {global_best_position}" + ) + + global_best_fitness = max( + self.fitness_function(particle) + for sub_swarm in sub_swarms + for particle in sub_swarm + ) + global_best_position = [ + particle + for sub_swarm in sub_swarms + for particle in sub_swarm + if self.fitness_function(particle) == global_best_fitness + ][0] + print( + f"Final Result: Global Best Fitness = {global_best_fitness}, Global Best Position = {global_best_position}" + ) + + +# Example usage +if __name__ == "__main__": + target_string = "hello world" + multi_swarm = MultiSwarm(target_string) + multi_swarm.optimize() diff --git a/swarms_torch/neuronal_transformer.py b/swarms_torch/neuronal_transformer.py index f4283c9..e862f94 100644 --- a/swarms_torch/neuronal_transformer.py +++ b/swarms_torch/neuronal_transformer.py @@ -153,8 +153,7 @@ def __init__(self, neuron_count, num_states, input_dim, output_dim, nhead): super(NNTransformer, self).__init__() # Initialize neurons and synapses - self.neurons = nn.ModuleList( - [Neuron(num_states) for _ in range(neuron_count)]) + self.neurons = nn.ModuleList([Neuron(num_states) for _ in range(neuron_count)]) self.synapses = nn.ModuleList( [ SynapseTransformer(input_dim, output_dim, nhead) diff --git a/swarms_torch/particle_swarm.py b/swarms_torch/particle_swarm.py index 2bdfdc3..fed29f2 100644 --- a/swarms_torch/particle_swarm.py +++ b/swarms_torch/particle_swarm.py @@ -130,5 +130,4 @@ def optimize( for _ in range(iterations): self.update() best_particle = self.global_best - print("Best Particle: ", "".join( - [chr(int(i)) for i in best_particle])) + print("Best Particle: ", "".join([chr(int(i)) for i in best_particle])) diff --git a/swarms_torch/queen_bee.py b/swarms_torch/queen_bee.py index b55393a..4a4f8b6 100644 --- a/swarms_torch/queen_bee.py +++ b/swarms_torch/queen_bee.py @@ -106,8 +106,7 @@ def _evolve(self): """ # Sort population by fitness - fitnesses = 1.0 / \ - torch.square(self.pool - self.target_gene).sum(dim=-1) + fitnesses = 1.0 / torch.square(self.pool - self.target_gene).sum(dim=-1) indices = fitnesses.sort(descending=True).indices self.pool, fitnesses = self.pool[indices], fitnesses[indices] @@ -131,13 +130,11 @@ def _evolve(self): self.queen_fitness, fitnesses = fitnesses[0], fitnesses[1:] # Deterministic tournament selection - contender_ids = torch.randn((self.pop_size - - 1, self.pop_size - - 1)).argsort(dim=- - 1)[..., : self.num_tournament_participants] + contender_ids = torch.randn((self.pop_size - 1, self.pop_size - 1)).argsort( + dim=-1 + )[..., : self.num_tournament_participants] participants, tournaments = self.pool[contender_ids], fitnesses[contender_ids] - top_winner = tournaments.topk( - 1, dim=-1, largest=True, sorted=False).indices + top_winner = tournaments.topk(1, dim=-1, largest=True, sorted=False).indices top_winner = top_winner.unsqueeze(-1).expand(-1, -1, self.gene_length) parents = participants.gather(1, top_winner).squeeze(1) @@ -146,7 +143,7 @@ def _evolve(self): self.pop_size - 1, self.gene_length ) self.pool = torch.cat( - (queen_parents[:, : self.gene_midpoint], parents[:, self.gene_midpoint:]), + (queen_parents[:, : self.gene_midpoint], parents[:, self.gene_midpoint :]), dim=-1, ) @@ -158,10 +155,8 @@ def _evolve(self): mutated_pool = torch.where(mutate_mask, self.pool + noise, self.pool) strong_mutate_mask = ( - torch.randn( - self.pool.shape).argsort( - dim=- - 1) < self.strong_num_code_mutate) + torch.randn(self.pool.shape).argsort(dim=-1) < self.strong_num_code_mutate + ) noise = torch.randint(0, 2, self.pool.shape) * 2 - 1 strong_mutated_pool = torch.where( strong_mutate_mask, self.pool + noise, self.pool @@ -180,8 +175,7 @@ def _check_convergence(self): """ Check if any of the solutions has achieved the goal """ - fitnesses = 1.0 / \ - torch.square(self.pool - self.target_gene).sum(dim=-1) + fitnesses = 1.0 / torch.square(self.pool - self.target_gene).sum(dim=-1) return (fitnesses == float("inf")).any().item() diff --git a/swarms_torch/spiral_optimization.py b/swarms_torch/spiral_optimization.py index 90a034d..5adc5a1 100644 --- a/swarms_torch/spiral_optimization.py +++ b/swarms_torch/spiral_optimization.py @@ -53,10 +53,8 @@ def __init__(self, goal: str = None, m: int = 10, k_max: int = 1000): # Initializing the search points and center randomly # Note: 32-126 is the ASCII range for all printable characters - self.points = torch.randint( - 32, 127, (self.m, self.n_dim), dtype=torch.float32) - self.center = torch.randint( - 32, 127, (self.n_dim,), dtype=torch.float32) + self.points = torch.randint(32, 127, (self.m, self.n_dim), dtype=torch.float32) + self.center = torch.randint(32, 127, (self.n_dim,), dtype=torch.float32) def _step_rate(self, k): """ diff --git a/swarms_torch/transformer_pso.py b/swarms_torch/transformer_pso.py index 2c52d31..dd59baa 100644 --- a/swarms_torch/transformer_pso.py +++ b/swarms_torch/transformer_pso.py @@ -30,8 +30,7 @@ class SimpleTransformer(nn.Module): def __init__(self, input_dim, d_model, nhead, num_layers, output_dim): super(SimpleTransformer, self).__init__() self.embedding = nn.Embedding(input_dim, d_model) - self.transformer = nn.Transformer( - d_model, nhead, num_layers, num_layers) + self.transformer = nn.Transformer(d_model, nhead, num_layers, num_layers) self.fc = nn.Linear(d_model, output_dim) def forward(self, x): @@ -104,11 +103,10 @@ def __init__( self.global_best_weight = global_best_weight # Representing particles using model parameters - param_size = sum(p.numel() - for p in model_constructor(*model_args).parameters()) + param_size = sum(p.numel() for p in model_constructor(*model_args).parameters()) self.particles = [ - self.model_constructor( - *model_args).to(device) for _ in range(n_particles)] + self.model_constructor(*model_args).to(device) for _ in range(n_particles) + ] self.velocities = [ torch.zeros((param_size,)).to(device) for _ in range(n_particles) ] @@ -154,8 +152,7 @@ def update(self): ) + self.global_best_weight * torch.rand_like(param) * ( self.global_best[name].to(self.device) - param.data ) - self.velocities[idx] += self.inertia * \ - self.velocities[idx] + delta + self.velocities[idx] += self.inertia * self.velocities[idx] + delta param.data += self.velocities[idx] def optimize(self, iterations=1000):