diff --git a/playground/silu_visualization.py b/playground/silu_visualization.py index 4d29ead..f9071a5 100644 --- a/playground/silu_visualization.py +++ b/playground/silu_visualization.py @@ -1,6 +1,5 @@ import matplotlib.pyplot as plt import numpy as np -from mpl_toolkits.mplot3d import Axes3D # SiLU (Sigmoid-weighted Linear Unit) activation function diff --git a/swarms_torch/__init__.py b/swarms_torch/__init__.py index 2f3b1a7..b4b2c1b 100644 --- a/swarms_torch/__init__.py +++ b/swarms_torch/__init__.py @@ -7,8 +7,6 @@ from swarms_torch.spiral_optimization import SPO from swarms_torch.multi_swarm_pso import MultiSwarmPSO from swarms_torch.transformer_pso import Particle, TransformerParticleSwarmOptimization -from swarms_torch.swarmalators.swarmalator_visualize import visualize_swarmalators -from swarms_torch.swarmalators.swarmalator_base import simulate_swarmalators __all__ = [ "ParticleSwarmOptimization", diff --git a/swarms_torch/ant_colony_swarm.py b/swarms_torch/ant_colony_swarm.py index fc8c6c6..65c7eb6 100644 --- a/swarms_torch/ant_colony_swarm.py +++ b/swarms_torch/ant_colony_swarm.py @@ -68,13 +68,15 @@ def fitness(self, solution): def update_pheromones(self): """Update pheromone levels""" for i, solution in enumerate(self.solutions): - self.pheromones[i] = (1 - self.evaporation_rate - ) * self.pheromones[i] + self.fitness(solution) + self.pheromones[i] = (1 - self.evaporation_rate) * self.pheromones[ + i + ] + self.fitness(solution) def choose_next_path(self): """Choose the next path based on the pheromone levels""" probabilities = (self.pheromones**self.alpha) * ( - (1.0 / (1 + self.pheromones))**self.beta) + (1.0 / (1 + self.pheromones)) ** self.beta + ) probabilities /= probabilities.sum() @@ -88,8 +90,8 @@ def optimize(self): # This is a placeholder. Actual implementation will define how # ants traverse the search space. solution = torch.randint( - 32, 127, (len(self.goal),), - dtype=torch.float32) # Random characters. + 32, 127, (len(self.goal),), dtype=torch.float32 + ) # Random characters. self.solutions.append(solution) self.update_pheromones() diff --git a/swarms_torch/autoregressive.py b/swarms_torch/autoregressive.py index 17c168a..e47c428 100644 --- a/swarms_torch/autoregressive.py +++ b/swarms_torch/autoregressive.py @@ -25,7 +25,6 @@ def cast_tuple(t, length=1): def eval_decorator(fn): - def inner(self, *args, **kwargs): was_training = self.training self.eval() @@ -48,8 +47,7 @@ def align_right(t, lens, pad_id=0): pad_lens = seq_len - lens max_pad_len = pad_lens.amax() - batch_arange = torch.arange(batch, device=device, dtype=torch.long)[..., - None] + batch_arange = torch.arange(batch, device=device, dtype=torch.long)[..., None] prompt_len_arange = torch.arange(seq_len, device=device, dtype=torch.long) t = F.pad(t, (max_pad_len, 0), value=0) @@ -67,8 +65,7 @@ def top_p(logits, thres=0.9): cum_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) sorted_indices_to_remove = cum_probs > thres - sorted_indices_to_remove = F.pad(sorted_indices_to_remove, (1, -1), - value=False) + sorted_indices_to_remove = F.pad(sorted_indices_to_remove, (1, -1), value=False) sorted_logits[sorted_indices_to_remove] = float("-inf") return sorted_logits.scatter(1, sorted_indices, sorted_logits) @@ -111,7 +108,8 @@ def contrastive_decode_fn(expert_logits, amateur_logits, alpha=0.1, beta=0.5): cutoff = log(alpha) + expert_logits.amax(dim=-1, keepdim=True) diffs = (1 + beta) * expert_logits - beta * amateur_logits contrastive_decode_logits = diffs.masked_fill( - expert_logits < cutoff, -torch.finfo(expert_logits.dtype).max) + expert_logits < cutoff, -torch.finfo(expert_logits.dtype).max + ) return contrastive_decode_logits @@ -119,13 +117,9 @@ def contrastive_decode_fn(expert_logits, amateur_logits, alpha=0.1, beta=0.5): class AutoregressiveWrapper(Module): - - def __init__(self, - net, - ignore_index=-100, - pad_value=0, - mask_prob=0.0, - add_attn_z_loss=False): + def __init__( + self, net, ignore_index=-100, pad_value=0, mask_prob=0.0, add_attn_z_loss=False + ): super().__init__() self.pad_value = pad_value self.ignore_index = ignore_index @@ -144,20 +138,21 @@ def __init__(self, @torch.no_grad() @eval_decorator - def generate(self, - prompts, - seq_len, - eos_token=None, - temperature=1.0, - prompt_lens: Optional[Tensor] = None, - filter_logits_fn: Callable = top_k, - restrict_to_max_seq_len=True, - amateur_model: Optional[Union[Module, Tuple[Module]]] = None, - filter_kwargs: dict = dict(), - contrastive_decode_kwargs: Union[dict, Tuple[dict]] = dict( - beta=0.5, alpha=0.1), - cache_kv=True, - **kwargs): + def generate( + self, + prompts, + seq_len, + eos_token=None, + temperature=1.0, + prompt_lens: Optional[Tensor] = None, + filter_logits_fn: Callable = top_k, + restrict_to_max_seq_len=True, + amateur_model: Optional[Union[Module, Tuple[Module]]] = None, + filter_kwargs: dict = dict(), + contrastive_decode_kwargs: Union[dict, Tuple[dict]] = dict(beta=0.5, alpha=0.1), + cache_kv=True, + **kwargs + ): max_seq_len, device = self.max_seq_len, prompts.device prompts, ps = pack([prompts], "* n") @@ -205,15 +200,16 @@ def generate(self, if exists(cache): for inter in cache.attn_intermediates: inter.cached_kv = [ - t[..., -(max_seq_len - 1):, :] - for t in inter.cached_kv + t[..., -(max_seq_len - 1) :, :] for t in inter.cached_kv ] - logits, new_cache = self.net(x, - return_intermediates=True, - cache=cache, - seq_start_pos=seq_start_pos, - **kwargs) + logits, new_cache = self.net( + x, + return_intermediates=True, + cache=cache, + seq_start_pos=seq_start_pos, + **kwargs + ) if cache_kv and self.net.can_cache_kv: cache = new_cache @@ -225,27 +221,29 @@ def generate(self, if exists(amateur_model): for i, ( - amateur, - amateur_cache, - amateur_contrastive_decode_kwargs, + amateur, + amateur_cache, + amateur_contrastive_decode_kwargs, ) in enumerate( - zip(amateur_model, amateur_caches, - contrastive_decode_kwargs)): + zip(amateur_model, amateur_caches, contrastive_decode_kwargs) + ): amateur_logits, next_amateur_cache = amateur( x, return_intermediates=True, cache=amateur_cache, seq_start_pos=seq_start_pos, - **kwargs) + **kwargs + ) amateur_logits = amateur_logits[:, -1] assert amateur_logits.shape == logits.shape, ( "logits dimension are not the same between amateur and expert" - " model") + " model" + ) logits = contrastive_decode_fn( - logits, amateur_logits, - **amateur_contrastive_decode_kwargs) + logits, amateur_logits, **amateur_contrastive_decode_kwargs + ) if cache_kv and amateur.can_cache_kv: amateur_caches[i] = next_amateur_cache @@ -289,20 +287,20 @@ def forward(self, x, **kwargs): if self.mask_prob > 0.0: rand = torch.randn(inp.shape, device=x.device) rand[:, 0] = -torch.finfo( - rand.dtype).max # first token should not be masked out + rand.dtype + ).max # first token should not be masked out num_mask = min(int(seq * self.mask_prob), seq - 1) indices = rand.topk(num_mask, dim=-1).indices mask = ~torch.zeros_like(inp).scatter(1, indices, 1.0).bool() kwargs.update(self_attn_kv_mask=mask) - logits, cache = self.net(inp, - return_intermediates=True, - return_attn_z_loss=add_attn_z_loss, - **kwargs) + logits, cache = self.net( + inp, return_intermediates=True, return_attn_z_loss=add_attn_z_loss, **kwargs + ) - loss = F.cross_entropy(rearrange(logits, "b n c -> b c n"), - target, - ignore_index=ignore_index) + loss = F.cross_entropy( + rearrange(logits, "b n c -> b c n"), target, ignore_index=ignore_index + ) if add_attn_z_loss: loss = loss + cache.attn_z_loss diff --git a/swarms_torch/cellular_transformer.py b/swarms_torch/cellular_transformer.py index c6e4be0..2a7dbc1 100644 --- a/swarms_torch/cellular_transformer.py +++ b/swarms_torch/cellular_transformer.py @@ -3,7 +3,6 @@ class TransformerCell(nn.Module): - def __init__( self, input_dim, @@ -12,9 +11,9 @@ def __init__( neighborhood_size=3, ): super(TransformerCell, self).__init__() - self.transformer = nn.Transformer(input_dim, - nhead=nhead, - num_encoder_layers=num_layers) + self.transformer = nn.Transformer( + input_dim, nhead=nhead, num_encoder_layers=num_layers + ) self.neighborhood_size = neighborhood_size def forward(self, x, neigbors): @@ -57,7 +56,8 @@ class CellularSwarm(nn.Module): def __init__(self, cell_count, input_dim, nhead, time_steps=4): super(CellularSwarm, self).__init__() self.cells = nn.ModuleList( - [TransformerCell(input_dim, nhead) for _ in range(cell_count)]) + [TransformerCell(input_dim, nhead) for _ in range(cell_count)] + ) self.time_steps = time_steps def forward(self, x): diff --git a/swarms_torch/fish_school.py b/swarms_torch/fish_school.py index 0fd6996..7f7a219 100644 --- a/swarms_torch/fish_school.py +++ b/swarms_torch/fish_school.py @@ -72,10 +72,9 @@ def __init__( alpha=0.1, ): super().__init__() - self.model = Transformer(d_model=dim, - nhead=heads, - num_encoder_layers=depth, - num_decoder_layers=depth) + self.model = Transformer( + d_model=dim, nhead=heads, num_encoder_layers=depth, num_decoder_layers=depth + ) self.optimizer = Adam(self.parameters()) self.scheduler = ReduceLROnPlateau(self.optimizer, "min") @@ -97,15 +96,13 @@ def train(self, src, tgt, labels): outputs = self.model(src, tgt) # cross entropy loss - loss = CrossEntropyLoss()(outputs.view(-1, outputs.size(-1)), - labels.view(-1)) + loss = CrossEntropyLoss()(outputs.view(-1, outputs.size(-1)), labels.view(-1)) # complexity regularization by adding the sum of the squares of the # weights if self.complexity_regularization: # complexity regularization - loss += self.alpha * sum( - p.pow(2.0).sum() for p in self.model.parameters()) + loss += self.alpha * sum(p.pow(2.0).sum() for p in self.model.parameters()) # backpropagation loss.backward() @@ -214,8 +211,7 @@ def forward(self, src, tgt, labels): # with higher food if self.complex_school: for fish in self.fish: - neighbor = self.fish[torch.randint(0, len(self.fish), - (1,)).item()] + neighbor = self.fish[torch.randint(0, len(self.fish), (1,)).item()] if neighbor.food > fish.food: fish.model.load_state_dict(neighbor.model.state_dict()) @@ -238,8 +234,9 @@ def predict(self, src, tgt): averages outputs of the top peforming models """ - top_fish = sorted(self.fish, key=lambda f: f.food, - reverse=True)[:self.num_top_fish] + top_fish = sorted(self.fish, key=lambda f: f.food, reverse=True)[ + : self.num_top_fish + ] self.model.eval() diff --git a/swarms_torch/graph_cellular_automa.py b/swarms_torch/graph_cellular_automa.py index c9c710f..00229d5 100644 --- a/swarms_torch/graph_cellular_automa.py +++ b/swarms_torch/graph_cellular_automa.py @@ -3,7 +3,6 @@ class GraphCellularAutomata(nn.Module): - def __init__(self, input_dim, hidden_dim, output_dim): super(GraphCellularAutomata, self).__init__() @@ -18,7 +17,6 @@ def forward(self, x): class ReplicationModel(nn.Module): - def __init__(self, input_dim, hidden_dim): super(ReplicationModel, self).__init__() @@ -34,27 +32,26 @@ def forward(self, x): class WeightUpdateModel(nn.Module): - def __init__(self, input_dim, hidden_dim): super(WeightUpdateModel, self).__init__() - self.mlp = nn.Sequential(nn.Linear(input_dim, hidden_dim), nn.ReLU(), - nn.Linear(hidden_dim, 1)) + self.mlp = nn.Sequential( + nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, 1) + ) def forward(self, x): return self.mlp(x) class NDP(nn.Module): - def __init__(self, embedding_dim, hidden_dim): super(NDP, self).__init__() - self.gc_automata = GraphCellularAutomata(embedding_dim, hidden_dim, - embedding_dim) + self.gc_automata = GraphCellularAutomata( + embedding_dim, hidden_dim, embedding_dim + ) self.replication_model = ReplicationModel(embedding_dim, hidden_dim) - self.weight_update_model = WeightUpdateModel(2 * embedding_dim, - hidden_dim) + self.weight_update_model = WeightUpdateModel(2 * embedding_dim, hidden_dim) def forward(self, node_embeddings, adjacency_matrix): # Update node embeddings using Graph Cellular Automata @@ -70,10 +67,10 @@ def forward(self, node_embeddings, adjacency_matrix): for i in range(num_nodes): for j in range(num_nodes): combined_embedding = torch.cat( - (updated_embeddings[i], updated_embeddings[j])) + (updated_embeddings[i], updated_embeddings[j]) + ) - edge_weights[i, - j] = self.weight_update_model(combined_embedding) + edge_weights[i, j] = self.weight_update_model(combined_embedding) return updated_embeddings, replication_decisions, edge_weights diff --git a/swarms_torch/hivemind_swarm_transformer.py b/swarms_torch/hivemind_swarm_transformer.py new file mode 100644 index 0000000..dba27cc --- /dev/null +++ b/swarms_torch/hivemind_swarm_transformer.py @@ -0,0 +1,144 @@ +import torch +from torch import nn +from zeta.structs.transformer import ( + Decoder, + Transformer, +) + + +class HivemindTransformer(nn.Module): + def __init__( + self, + dim: int = None, + max_seq_len: int = None, + depth: int = None, + heads: int = None, + dim_head: int = None, + num_tokens: int = None, + ): + super(HivemindTransformer, self).__init__() + self.dim = dim + self.max_seq_len = max_seq_len + self.depth = depth + self.heads = heads + self.dim_head = dim_head + self.num_tokens = num_tokens + + self.model = Transformer( + num_tokens=num_tokens, + max_seq_len=max_seq_len, + attn_layers=Decoder( + dim=dim, + depth=depth, + dim_head=dim_head, + heads=heads, + ), + ) + + def forward(self, x): + return self.model(x) + + +class HivemindSwarm(nn.Module): + """ + HiveMind Swarm Transformer + + This is a transformer that is composed of a swarm of transformers where each transformer shares the same weights. + + Args: + dim: dimension of the model + max_seq_len: maximum sequence length + depth: depth of the model + heads: number of heads + dim_head: dimension of each head + num_models: number of models in the swarm + base_transformer: the base transformer to be used in the swarm + + + Example:: + model = HivemindSwarm( + dim=512, + max_seq_len=1024, + depth=6, + heads=8, + dim_head=64, + num_models=4, + ) + + x = torch.randn(1, 1024, 512) + y = model(x) + print(y.shape) + + + """ + + def __init__( + self, + dim: int = None, + max_seq_len: int = None, + num_tokens: int = None, + depth: int = None, + heads: int = None, + dim_head: int = None, + num_models: int = 1, + **kwargs, + ): + super(HivemindSwarm, self).__init__() + + self.dim = dim + self.max_seq_len = max_seq_len + self.depth = depth + self.heads = heads + self.num_tokens = num_tokens + self.dim_head = dim_head + self.num_models = num_models + self.base_transformer = HivemindTransformer( + dim=dim, + num_tokens=num_tokens, + max_seq_len=max_seq_len, + depth=depth, + heads=heads, + dim_head=dim_head, + ) + # Create a list of transformers sharing the same weights + self.experts = nn.ModuleList([self.base_transformer for _ in range(num_models)]) + + # Gating mechniams allows the model to dynamically weight the contribution of each transformer + # in the swarm. This is done by learning a weight for each transformer and then using a softmax + # to normalize the weights. + self.gate = nn.Linear(num_models, num_models) + self.gate_activation = nn.Softmax(dim=-1) + self.gate_bias = nn.Parameter(torch.zeros(num_models)) + + def forward(self, x): + logits = [] + for expert in self.experts: + output = expert(x) + logits.append(output) + # Run each transformer on the input + # outputs = [expert(x) for expert in self.experts] + + # stack outputs + outputs = torch.stack(logits, dim=1) + + # Compute the gate + gate = self.gate_activation(self.gate_bias + self.gate(outputs)) + + # Weight the outputs + outputs = torch.sum(outputs * gate.unsqueeze(-1), dim=1) + return outputs + + +model = HivemindSwarm( + dim=512, + max_seq_len=1024, + num_tokens=20000, + depth=6, + heads=8, + dim_head=64, + num_models=4, +) + +x = torch.randn(1, 1024, 512) +y = model(x) +print(y.shape) diff --git a/swarms_torch/ma_agent.py b/swarms_torch/ma_agent.py index c224079..149576a 100644 --- a/swarms_torch/ma_agent.py +++ b/swarms_torch/ma_agent.py @@ -5,9 +5,7 @@ class MAgent: - class Agent(nn.Module): - def __init__(self, input_dim, output_dim): super().__init__() self.policy = nn.Sequential( @@ -21,17 +19,15 @@ def forward(self, state): return self.policy(state) class MultiGymEnvironment: - def __init__(self, env_name, num_agents): self.envs = [gym.make(env_name) for _ in range(num_agents)] self.agents = [ - MAgent.Agent(self.envs[0].observation_space.shape[0], - self.envs[0].action_space.n) + MAgent.Agent( + self.envs[0].observation_space.shape[0], self.envs[0].action_space.n + ) for _ in range(num_agents) ] - self.optimizers = [ - optim.Adam(agent.parameters()) for agent in self.agents - ] + self.optimizers = [optim.Adam(agent.parameters()) for agent in self.agents] def step(self, agent_actions): rewards = [] @@ -53,10 +49,12 @@ def train(self, epochs=1000): ] rewards = self.step(actions) - for agent, optimizer, reward in zip(self.agents, - self.optimizers, rewards): - loss = (-torch.log(agent(torch.FloatTensor(states))) * - reward) # Example loss function + for agent, optimizer, reward in zip( + self.agents, self.optimizers, rewards + ): + loss = ( + -torch.log(agent(torch.FloatTensor(states))) * reward + ) # Example loss function optimizer.zero_grad() loss.backward() optimizer.step() diff --git a/swarms_torch/moe/README.md b/swarms_torch/moe/README.md new file mode 100644 index 0000000..d695050 --- /dev/null +++ b/swarms_torch/moe/README.md @@ -0,0 +1,16 @@ +# Various MOE Algorithms + +| Technique Category | Specific Technique | Description | Applications | +|----------------------|--------------------------|-------------------------------------------------------------------------------------------------------|----------------------------------------------| +| **Routing Mechanisms** | Gating Networks | Determines which expert handles which part of the input, typically using learnable parameters. | Used in nearly all MoE models for efficient expert utilization. | +| | Top-k Gating | Routes each input to the top k experts based on gating scores, promoting specialization. | Enhances capacity and efficiency in large-scale models. | +| | Sparse Gating | Routes inputs to a small subset of experts, ensuring sparse connectivity for scalability. | Useful in scaling MoE models for high-dimensional data. | +| **Expert Architecture** | Feedforward Networks | Standard architecture for individual experts, consisting of one or more fully connected layers. | Common in basic MoE implementations for simple tasks. | +| | Convolutional Experts | Experts use convolutional layers, ideal for spatial data like images. | Applied in computer vision tasks within MoE frameworks. | +| | Recurrent Experts | Utilizes RNNs or LSTMs for experts, suitable for sequential data. | Effective in NLP and time-series analysis in MoE models. | +| **Load Balancing** | Auxiliary Loss | An additional loss term to encourage even distribution of workload among experts. | Addresses the load imbalance issue in MoE models. | +| | Capacity Loss | Penalizes over-utilization of any single expert, promoting equal usage. | Further mitigates load imbalance in large MoE networks. | +| **Training Strategies** | Gradient Blending | Combines gradients from different experts efficiently for backpropagation. | Essential for stable and efficient training of MoE models. | +| | Expert Dropout | Randomly drops experts during training to promote robustness and prevent overfitting. | Increases generalization and model robustness. | +| **Optimization Techniques** | Expert Pruning | Removes less utilized experts from the model post-training for efficiency. | Reduces computational overhead in deployed models. | +| | Adaptive Computation | Dynamically adjusts the computational effort based on the input complexity. | Optimizes computational resources during inference. | diff --git a/swarms_torch/moe/__init__.py b/swarms_torch/moe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swarms_torch/multi_swarm_pso.py b/swarms_torch/multi_swarm_pso.py index 93bb983..75e8a27 100644 --- a/swarms_torch/multi_swarm_pso.py +++ b/swarms_torch/multi_swarm_pso.py @@ -1,4 +1,3 @@ -import torch import random import string @@ -77,7 +76,8 @@ def generate_random_string(self): """ return "".join( random.choice(string.ascii_lowercase + " ") - for _ in range(self.num_dimensions)) + for _ in range(self.num_dimensions) + ) def fitness_function(self, position): """Fitness function to be maximized""" @@ -95,19 +95,17 @@ def diversification_method(self, sub_swarms): def optimize(self): """Optimizes the fitness function""" - sub_swarms = [[ - self.generate_random_string() - for _ in range(self.num_particles_per_swarm) + sub_swarms = [ + [self.generate_random_string() for _ in range(self.num_particles_per_swarm)] + for _ in range(self.num_sub_swarms) ] - for _ in range(self.num_sub_swarms)] for iteration in range(self.max_iterations): for sub_swarm in sub_swarms: for particle in sub_swarm: fitness = self.fitness_function(particle) if fitness > 0: - index_to_change = random.randint( - 0, self.num_dimensions - 1) + index_to_change = random.randint(0, self.num_dimensions - 1) new_char = random.choice(string.ascii_lowercase + " ") new_position = list(particle) new_position[index_to_change] = new_char @@ -119,26 +117,34 @@ def optimize(self): global_best_fitness = max( self.fitness_function(particle) for sub_swarm in sub_swarms - for particle in sub_swarm) + for particle in sub_swarm + ) global_best_position = [ - particle for sub_swarm in sub_swarms for particle in sub_swarm + particle + for sub_swarm in sub_swarms + for particle in sub_swarm if self.fitness_function(particle) == global_best_fitness ][0] print( f"Iteration {iteration}: Global Best Fitness = {global_best_fitness}," - f" Global Best Position = {global_best_position}") + f" Global Best Position = {global_best_position}" + ) global_best_fitness = max( self.fitness_function(particle) for sub_swarm in sub_swarms - for particle in sub_swarm) + for particle in sub_swarm + ) global_best_position = [ - particle for sub_swarm in sub_swarms for particle in sub_swarm + particle + for sub_swarm in sub_swarms + for particle in sub_swarm if self.fitness_function(particle) == global_best_fitness ][0] print( f"Final Result: Global Best Fitness = {global_best_fitness}, Global Best" - f" Position = {global_best_position}") + f" Position = {global_best_position}" + ) # Example usage diff --git a/swarms_torch/multi_swarm_pso2.py b/swarms_torch/multi_swarm_pso2.py index f35b167..c58b207 100644 --- a/swarms_torch/multi_swarm_pso2.py +++ b/swarms_torch/multi_swarm_pso2.py @@ -2,7 +2,6 @@ class Particle: - def __init__(self, dim, minx, maxx): self.position = torch.rand(dim) * (maxx - minx) + minx self.velocity = torch.rand(dim) * (maxx - minx) + minx @@ -12,9 +11,11 @@ def __init__(self, dim, minx, maxx): def update_velocity(self, global_best, w=0.7, c1=1.5, c2=1.5): r1 = torch.rand(self.position.size()) r2 = torch.rand(self.position.size()) - self.velocity = (w * self.velocity + c1 * r1 * - (self.best_position - self.position) + c2 * r2 * - (global_best - self.position)) + self.velocity = ( + w * self.velocity + + c1 * r1 * (self.best_position - self.position) + + c2 * r2 * (global_best - self.position) + ) def update_position(self, minx, maxx): self.position += self.velocity @@ -22,11 +23,8 @@ def update_position(self, minx, maxx): class Swarm: - def __init__(self, num_particles, dim, minx, maxx): - self.particles = [ - Particle(dim, minx, maxx) for _ in range(num_particles) - ] + self.particles = [Particle(dim, minx, maxx) for _ in range(num_particles)] self.global_best = None self.global_best_score = float("inf") @@ -43,11 +41,8 @@ def move_particles(self, minx, maxx): class MultiSwarm: - def __init__(self, num_swarms, num_particles, dim, minx, maxx): - self.swarms = [ - Swarm(num_particles, dim, minx, maxx) for _ in range(num_swarms) - ] + self.swarms = [Swarm(num_particles, dim, minx, maxx) for _ in range(num_swarms)] self.minx = minx self.maxx = maxx @@ -61,7 +56,7 @@ def optimize(self, func, max_iter): def rosenbrock(x, a=1, b=100): - return (a - x[0])**2 + b * (x[1] - x[0]**2)**2 + return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2 # num_swarms = 5 diff --git a/swarms_torch/multi_swarm_pso_transformer.py b/swarms_torch/multi_swarm_pso_transformer.py index bc113bd..aeaa860 100644 --- a/swarms_torch/multi_swarm_pso_transformer.py +++ b/swarms_torch/multi_swarm_pso_transformer.py @@ -82,7 +82,6 @@ class Particle(nn.Module): - def __init__(self, input_dim, hidden_dim, output_dim): super(Particle, self).__init__() self.transformer = nn.Transformer(input_dim, hidden_dim) @@ -95,9 +94,9 @@ def forward(self, x): class MultiSwarmOptimizer: - - def __init__(self, particle, num_particles, num_subswarms, fitness_func, - bounds, num_epochs): + def __init__( + self, particle, num_particles, num_subswarms, fitness_func, bounds, num_epochs + ): self.particle = particle self.num_particles = num_particles self.num_subswarms = num_subswarms @@ -107,8 +106,7 @@ def __init__(self, particle, num_particles, num_subswarms, fitness_func, self.subswarms = [] for _ in range(num_subswarms): - self.subswarms.append( - [deepcopy(particle) for _ in range(num_particles)]) + self.subswarms.append([deepcopy(particle) for _ in range(num_particles)]) def optimize(self): for epoch in range(self.num_epochs): @@ -122,21 +120,25 @@ def optimize(self): best_particle = max(subswarm, key=lambda p: p.best_fitness) for particle in subswarm: particle.velocity = ( - particle.velocity + 0.5 * - (particle.best_position - particle.position) + 0.5 * - (best_particle.best_position - particle.position)) + particle.velocity + + 0.5 * (particle.best_position - particle.position) + + 0.5 * (best_particle.best_position - particle.position) + ) particle.position = particle.position + particle.velocity - particle.position = torch.clamp(particle.position, - *self.bounds) + particle.position = torch.clamp(particle.position, *self.bounds) - best_subswarm = max(self.subswarms, - key=lambda s: max(p.best_fitness for p in s)) + best_subswarm = max( + self.subswarms, key=lambda s: max(p.best_fitness for p in s) + ) best_particle = max(best_subswarm, key=lambda p: p.best_fitness) - print(f"Epoch {epoch+1}/{self.num_epochs}, Best Fitness:" - f" {best_particle.best_fitness}") - - best_subswarm = max(self.subswarms, - key=lambda s: max(p.best_fitness for p in s)) + print( + f"Epoch {epoch+1}/{self.num_epochs}, Best Fitness:" + f" {best_particle.best_fitness}" + ) + + best_subswarm = max( + self.subswarms, key=lambda s: max(p.best_fitness for p in s) + ) best_particle = max(best_subswarm, key=lambda p: p.best_fitness) return best_particle @@ -172,8 +174,9 @@ def fitness_func(particle): particle = Particle(input_dim, hidden_dim, output_dim) # Create the multi-swarm optimizer -optimizer = MultiSwarmOptimizer(particle, num_particles, num_subswarms, - fitness_func, bounds, num_epochs) +optimizer = MultiSwarmOptimizer( + particle, num_particles, num_subswarms, fitness_func, bounds, num_epochs +) # Run the optimization best_particle = optimizer.optimize() diff --git a/swarms_torch/neuronal_transformer.py b/swarms_torch/neuronal_transformer.py index 0faeb4f..28fc7fa 100644 --- a/swarms_torch/neuronal_transformer.py +++ b/swarms_torch/neuronal_transformer.py @@ -49,14 +49,12 @@ def forward(self, x): class Neuron(nn.Module): - def __init__(self, num_states): super(Neuron, self).__init__() self.states = nn.Parameter(torch.randn(num_states)) class SynapseTransformer(nn.Module): - def __init__(self, input_dim, output_dim, nhead: int): super(SynapseTransformer, self).__init__() self.transformer = TransformerLayer(input_dim, output_dim, nhead) @@ -154,12 +152,13 @@ def __init__(self, neuron_count, num_states, input_dim, output_dim, nhead): super(NNTransformer, self).__init__() # Initialize neurons and synapses - self.neurons = nn.ModuleList( - [Neuron(num_states) for _ in range(neuron_count)]) - self.synapses = nn.ModuleList([ - SynapseTransformer(input_dim, output_dim, nhead) - for _ in range(neuron_count) - ]) + self.neurons = nn.ModuleList([Neuron(num_states) for _ in range(neuron_count)]) + self.synapses = nn.ModuleList( + [ + SynapseTransformer(input_dim, output_dim, nhead) + for _ in range(neuron_count) + ] + ) self.norm = nn.LayerNorm(output_dim) self.softmax = nn.Softmax(dim=1) diff --git a/swarms_torch/particle_swarm.py b/swarms_torch/particle_swarm.py index 00e971f..fed29f2 100644 --- a/swarms_torch/particle_swarm.py +++ b/swarms_torch/particle_swarm.py @@ -78,12 +78,18 @@ def compute_fitness( ): return 1.0 / (1.0 + torch.norm((particle - self.goal).float())) - def update(self,): + def update( + self, + ): """Update the particles""" for i in range(self.n_particles): - fitness = self.compute_fitness(self.particles[i],) + fitness = self.compute_fitness( + self.particles[i], + ) - personal_best_fitness = self.compute_fitness(self.personal_best[i],) + personal_best_fitness = self.compute_fitness( + self.personal_best[i], + ) if fitness > personal_best_fitness: self.personal_best[i] = self.particles[i] @@ -94,16 +100,23 @@ def update(self,): self.global_best = self.particles[i] # update velocity - personal_attraction = (self.personal_best_weight * - torch.rand(self.goal.size()) * - (self.personal_best[i] - self.particles[i])) - - global_attraction = (self.global_best_weight * - torch.rand(self.goal.size()) * - (self.global_best - self.particles[i])) - - self.velocities[i] = (self.inertia * self.velocities[i] + - personal_attraction + global_attraction) + personal_attraction = ( + self.personal_best_weight + * torch.rand(self.goal.size()) + * (self.personal_best[i] - self.particles[i]) + ) + + global_attraction = ( + self.global_best_weight + * torch.rand(self.goal.size()) + * (self.global_best - self.particles[i]) + ) + + self.velocities[i] = ( + self.inertia * self.velocities[i] + + personal_attraction + + global_attraction + ) # Update position self.particles[i] += self.velocities[i].int() @@ -117,5 +130,4 @@ def optimize( for _ in range(iterations): self.update() best_particle = self.global_best - print("Best Particle: ", - "".join([chr(int(i)) for i in best_particle])) + print("Best Particle: ", "".join([chr(int(i)) for i in best_particle])) diff --git a/swarms_torch/queen_bee.py b/swarms_torch/queen_bee.py index 16440e8..4a4f8b6 100644 --- a/swarms_torch/queen_bee.py +++ b/swarms_torch/queen_bee.py @@ -113,9 +113,7 @@ def _evolve(self): # Display every generation if self.queen is not None: print("queen:") - print( - f"{self.decode(self.queen)} ({self.queen_fitness.item():.3f})\n" - ) + print(f"{self.decode(self.queen)} ({self.queen_fitness.item():.3f})\n") for gene, fitness in zip(self.pool, fitnesses): print(f"{self.decode(gene)} ({fitness.item():.3f})") @@ -132,41 +130,45 @@ def _evolve(self): self.queen_fitness, fitnesses = fitnesses[0], fitnesses[1:] # Deterministic tournament selection - contender_ids = torch.randn( - (self.pop_size - 1, self.pop_size - - 1)).argsort(dim=-1)[..., :self.num_tournament_participants] - participants, tournaments = self.pool[contender_ids], fitnesses[ - contender_ids] - top_winner = tournaments.topk(1, dim=-1, largest=True, - sorted=False).indices + contender_ids = torch.randn((self.pop_size - 1, self.pop_size - 1)).argsort( + dim=-1 + )[..., : self.num_tournament_participants] + participants, tournaments = self.pool[contender_ids], fitnesses[contender_ids] + top_winner = tournaments.topk(1, dim=-1, largest=True, sorted=False).indices top_winner = top_winner.unsqueeze(-1).expand(-1, -1, self.gene_length) parents = participants.gather(1, top_winner).squeeze(1) # Cross over all chosen drones with the queen - queen_parents = self.queen.unsqueeze(0).expand(self.pop_size - 1, - self.gene_length) + queen_parents = self.queen.unsqueeze(0).expand( + self.pop_size - 1, self.gene_length + ) self.pool = torch.cat( - (queen_parents[:, :self.gene_midpoint], - parents[:, self.gene_midpoint:]), + (queen_parents[:, : self.gene_midpoint], parents[:, self.gene_midpoint :]), dim=-1, ) # Mutate genes in population - mutate_mask = (torch.randn(self.pool.shape).argsort(dim=-1) < - self.num_code_mutate) + mutate_mask = ( + torch.randn(self.pool.shape).argsort(dim=-1) < self.num_code_mutate + ) noise = torch.randint(0, 2, self.pool.shape) * 2 - 1 mutated_pool = torch.where(mutate_mask, self.pool + noise, self.pool) - strong_mutate_mask = (torch.randn(self.pool.shape).argsort(dim=-1) < - self.strong_num_code_mutate) + strong_mutate_mask = ( + torch.randn(self.pool.shape).argsort(dim=-1) < self.strong_num_code_mutate + ) noise = torch.randint(0, 2, self.pool.shape) * 2 - 1 - strong_mutated_pool = torch.where(strong_mutate_mask, self.pool + noise, - self.pool) + strong_mutated_pool = torch.where( + strong_mutate_mask, self.pool + noise, self.pool + ) - strong_mutate_pool_mask = (torch.randn(self.pop_size - 1).argsort( - dim=-1) < self.strong_mutate_pool_size) - self.pool = torch.where(strong_mutate_pool_mask[:, None], - strong_mutated_pool, mutated_pool) + strong_mutate_pool_mask = ( + torch.randn(self.pop_size - 1).argsort(dim=-1) + < self.strong_mutate_pool_size + ) + self.pool = torch.where( + strong_mutate_pool_mask[:, None], strong_mutated_pool, mutated_pool + ) self.pool.clamp_(0, 255) def _check_convergence(self): diff --git a/swarms_torch/spiral_optimization.py b/swarms_torch/spiral_optimization.py index b8bc60b..5adc5a1 100644 --- a/swarms_torch/spiral_optimization.py +++ b/swarms_torch/spiral_optimization.py @@ -43,8 +43,9 @@ def __init__(self, goal: str = None, m: int = 10, k_max: int = 1000): - m: Number of search points (strings). - k_max: Maximum number of iterations. """ - self.goal = torch.tensor([ord(c) for c in goal], - dtype=torch.float32) # ASCII representation + self.goal = torch.tensor( + [ord(c) for c in goal], dtype=torch.float32 + ) # ASCII representation self.m = m self.k_max = k_max @@ -52,9 +53,7 @@ def __init__(self, goal: str = None, m: int = 10, k_max: int = 1000): # Initializing the search points and center randomly # Note: 32-126 is the ASCII range for all printable characters - self.points = torch.randint(32, - 127, (self.m, self.n_dim), - dtype=torch.float32) + self.points = torch.randint(32, 127, (self.m, self.n_dim), dtype=torch.float32) self.center = torch.randint(32, 127, (self.n_dim,), dtype=torch.float32) def _step_rate(self, k): @@ -74,7 +73,8 @@ def _update_points(self, k): R = torch.eye(self.n_dim) # Identity for simplicity in n-dimensions for i in range(self.m): self.points[i] = self.center + r * torch.mv( - R, (self.points[i] - self.center)) + R, (self.points[i] - self.center) + ) def _update_center(self): """Find the best search point and set as the new center.""" @@ -87,8 +87,9 @@ def optimize(self): for k in range(self.k_max): self._update_points(k) self._update_center() - if (torch.norm(self.center - self.goal) < - 1e-5): # Example convergence condition + if ( + torch.norm(self.center - self.goal) < 1e-5 + ): # Example convergence condition break def best_string(self): diff --git a/swarms_torch/swarmalators/swarmalator_base.py b/swarms_torch/swarmalators/swarmalator_base.py index 1c4a3ab..c320738 100644 --- a/swarms_torch/swarmalators/swarmalator_base.py +++ b/swarms_torch/swarmalators/swarmalator_base.py @@ -7,13 +7,11 @@ def pairwise_distances(x): return torch.sqrt((diff**2).sum(2)) -def function_for_x(xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, - R, D): +def function_for_x(xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D): dists = pairwise_distances(xi) mask = (dists < R).float() - torch.eye(N) - interaction_term = mask.unsqueeze(2) * (sigma_i.unsqueeze(0) - - sigma_i.unsqueeze(1)) + interaction_term = mask.unsqueeze(2) * (sigma_i.unsqueeze(0) - sigma_i.unsqueeze(1)) interaction_sum = interaction_term.sum(1) # Define dynamics for x based on our assumptions @@ -21,8 +19,9 @@ def function_for_x(xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, return dx -def function_for_sigma(xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, - epsilon_r, R, D): +def function_for_sigma( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D +): dists = pairwise_distances(xi) mask = (dists < R).float() - torch.eye(N) @@ -30,22 +29,13 @@ def function_for_sigma(xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, interaction_sum = interaction_term.sum(1) # Define dynamics for sigma based on our assumptions - d_sigma = gamma * interaction_sum + epsilon_a * sigma_i - epsilon_r * ( - sigma_i**3) + d_sigma = gamma * interaction_sum + epsilon_a * sigma_i - epsilon_r * (sigma_i**3) return d_sigma -def simulate_swarmalators(N, - J, - alpha, - beta, - gamma, - epsilon_a, - epsilon_r, - R, - D, - T=100, - dt=0.1): +def simulate_swarmalators( + N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=100, dt=0.1 +): xi = 2 * torch.rand(N, 3) - 1 sigma_i = torch.nn.functional.normalize(torch.randn(N, D), dim=1) @@ -54,10 +44,12 @@ def simulate_swarmalators(N, for t in range(T): for i in range(N): - dx = function_for_x(xi, sigma_i, N, J, alpha, beta, gamma, - epsilon_a, epsilon_r, R, D) - d_sigma = function_for_sigma(xi, sigma_i, N, J, alpha, beta, gamma, - epsilon_a, epsilon_r, R, D) + dx = function_for_x( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) + d_sigma = function_for_sigma( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) # RK4 for xi k1_x = dt * dx @@ -87,8 +79,9 @@ def simulate_swarmalators(N, R, D, ) - k4_x = dt * function_for_x(xi + k3_x, sigma_i, N, J, alpha, beta, - gamma, epsilon_a, epsilon_r, R, D) + k4_x = dt * function_for_x( + xi + k3_x, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) xi = xi + (1 / 6) * (k1_x + 2 * k2_x + 2 * k3_x + k4_x) # RK4 for sigma_i @@ -132,8 +125,9 @@ def simulate_swarmalators(N, R, D, ) - sigma_i = sigma_i + (1 / 6) * (k1_sigma + 2 * k2_sigma + - 2 * k3_sigma + k4_sigma) + sigma_i = sigma_i + (1 / 6) * ( + k1_sigma + 2 * k2_sigma + 2 * k3_sigma + k4_sigma + ) sigma_i = torch.nn.functional.normalize(sigma_i, dim=1) results_xi.append(xi.clone()) diff --git a/swarms_torch/swarmalators/swarmalator_transformer.py b/swarms_torch/swarmalators/swarmalator_transformer.py index b74fead..57262f1 100644 --- a/swarms_torch/swarmalators/swarmalator_transformer.py +++ b/swarms_torch/swarmalators/swarmalator_transformer.py @@ -16,12 +16,7 @@ class SwarmalatorModel(nn.Module): print(positions, orientations) """ - def __init__(self, - N, - D, - nhead=8, - num_encoder_layers=6, - num_decoder_layers=6): + def __init__(self, N, D, nhead=8, num_encoder_layers=6, num_decoder_layers=6): super(SwarmalatorModel, self).__init__() self.N = N self.D = D @@ -32,19 +27,23 @@ def __init__(self, # Transformer encoder to process positions and orientations encoder_layer = nn.TransformerEncoderLayer(d_model=D, nhead=nhead) self.transformer_encoder = nn.TransformerEncoder( - encoder_layer, num_layers=num_encoder_layers) + encoder_layer, num_layers=num_encoder_layers + ) # Transformer decoder to produce updated positions and orientations decoder_layer = nn.TransformerDecoderLayer(d_model=D, nhead=nhead) self.transformer_decoder = nn.TransformerDecoder( - decoder_layer, num_layers=num_decoder_layers) + decoder_layer, num_layers=num_decoder_layers + ) def forward(self, src_mask=None, tgt_mask=None, memory_mask=None): # Using transformer encoder to get memory - position_memory = self.transformer_encoder(self.positions.unsqueeze(1), - mask=src_mask) + position_memory = self.transformer_encoder( + self.positions.unsqueeze(1), mask=src_mask + ) orientation_memory = self.transformer_encoder( - self.orientations.unsqueeze(1), mask=src_mask) + self.orientations.unsqueeze(1), mask=src_mask + ) # Using transformer decoder to get updated positions and orientations updated_positions = self.transformer_decoder( self.positions.unsqueeze(1), diff --git a/swarms_torch/swarmalators/swarmalator_visualize.py b/swarms_torch/swarmalators/swarmalator_visualize.py index 3c3ee27..2c6c4e3 100644 --- a/swarms_torch/swarmalators/swarmalator_visualize.py +++ b/swarms_torch/swarmalators/swarmalator_visualize.py @@ -6,8 +6,9 @@ N = 100 J, alpha, beta, gamma, epsilon_a, epsilon_r, R = [0.1] * 7 D = 3 # Ensure D is an integer -xi, sigma_i = simulate_swarmalators(N, J, alpha, beta, gamma, epsilon_a, - epsilon_r, R, D) +xi, sigma_i = simulate_swarmalators( + N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D +) print(xi[-1], sigma_i[-1]) @@ -32,11 +33,7 @@ def update(num): scatter._offsets3d = (x_data, y_data, z_data) return (scatter,) - ani = FuncAnimation(fig, - update, - frames=len(results_xi), - init_func=init, - blit=False) + FuncAnimation(fig, update, frames=len(results_xi), init_func=init, blit=False) plt.show() diff --git a/swarms_torch/transformer_pso.py b/swarms_torch/transformer_pso.py index f61cac7..6bed2fb 100644 --- a/swarms_torch/transformer_pso.py +++ b/swarms_torch/transformer_pso.py @@ -30,8 +30,7 @@ class Particle(nn.Module): def __init__(self, input_dim, d_model, nhead, num_layers, output_dim): super(Particle, self).__init__() self.embedding = nn.Embedding(input_dim, d_model) - self.transformer = nn.Transformer(d_model, nhead, num_layers, - num_layers) + self.transformer = nn.Transformer(d_model, nhead, num_layers, num_layers) self.fc = nn.Linear(d_model, output_dim) def forward(self, x): @@ -105,11 +104,9 @@ def __init__( self.global_best_weight = global_best_weight # Representing particles using model parameters - param_size = sum( - p.numel() for p in model_constructor(*model_args).parameters()) + param_size = sum(p.numel() for p in model_constructor(*model_args).parameters()) self.particles = [ - self.model_constructor(*model_args).to(device) - for _ in range(n_particles) + self.model_constructor(*model_args).to(device) for _ in range(n_particles) ] self.velocities = [ torch.zeros((param_size,)).to(device) for _ in range(n_particles) @@ -154,9 +151,9 @@ def update(self): delta = self.personal_best_weight * torch.rand_like(param) * ( self.personal_best[idx][name].to(self.device) - param.data ) + self.global_best_weight * torch.rand_like(param) * ( - self.global_best[name].to(self.device) - param.data) - self.velocities[ - idx] += self.inertia * self.velocities[idx] + delta + self.global_best[name].to(self.device) - param.data + ) + self.velocities[idx] += self.inertia * self.velocities[idx] + delta param.data += self.velocities[idx] def optimize(self, iterations=1000): @@ -164,8 +161,10 @@ def optimize(self, iterations=1000): for _ in range(iterations): self.update() best_particle_score = self.compute_fitness(self.global_best) - print(f"Iteration {_ + 1}/{iterations} - Best Particle Fitness:" - f" {best_particle_score}") + print( + f"Iteration {_ + 1}/{iterations} - Best Particle Fitness:" + f" {best_particle_score}" + ) def get_best_model(self): """Get the best model.""" diff --git a/tests/ant_colony.py b/tests/ant_colony.py index 78c0982..4a3dbd8 100644 --- a/tests/ant_colony.py +++ b/tests/ant_colony.py @@ -5,11 +5,10 @@ class TestAntColonyOptimization(unittest.TestCase): - def setUp(self): - self.aco = AntColonyOptimization(goal="Hello ACO", - num_ants=1000, - num_iterations=10) + self.aco = AntColonyOptimization( + goal="Hello ACO", num_ants=1000, num_iterations=10 + ) def test_initialization(self): self.assertEqual(self.aco.goal.tolist(), [ord(c) for c in "Hello ACO"]) @@ -17,16 +16,15 @@ def test_initialization(self): self.assertEqual(self.aco.pheromones.tolist(), [1.0] * 1000) def test_fitness(self): - solution = torch.tensor([ord(c) for c in "Hello ACO"], - dtype=torch.float32) - self.assertEqual(self.aco.fitness(solution).item(), - 0) # Should be maximum fitness + solution = torch.tensor([ord(c) for c in "Hello ACO"], dtype=torch.float32) + self.assertEqual( + self.aco.fitness(solution).item(), 0 + ) # Should be maximum fitness def test_update_pheromones(self): initial_pheromones = self.aco.pheromones.clone() self.aco.solutions = [ - torch.tensor([ord(c) - for c in "Hello ACO"], dtype=torch.float32) + torch.tensor([ord(c) for c in "Hello ACO"], dtype=torch.float32) for _ in range(1000) ] self.aco.update_pheromones() diff --git a/tests/fish_school.py b/tests/fish_school.py index 9ed0709..4ddd071 100644 --- a/tests/fish_school.py +++ b/tests/fish_school.py @@ -17,21 +17,13 @@ def test_fish_train(): def test_fishschool_initialization(): - fishschool = FishSchool(num_fish=10, - dim=512, - heads=8, - depth=6, - num_iter=100) + fishschool = FishSchool(num_fish=10, dim=512, heads=8, depth=6, num_iter=100) assert isinstance(fishschool, FishSchool) assert len(fishschool.fish) == 10 def test_fishschool_forward(): - fishschool = FishSchool(num_fish=10, - dim=512, - heads=8, - depth=6, - num_iter=100) + fishschool = FishSchool(num_fish=10, dim=512, heads=8, depth=6, num_iter=100) src = torch.randn(10, 32, 512) tgt = torch.randn(10, 32, 512) labels = torch.randint(0, 512, (10, 32)) diff --git a/tests/neuronal_transformer.py b/tests/neuronal_transformer.py index cb34d74..f906deb 100644 --- a/tests/neuronal_transformer.py +++ b/tests/neuronal_transformer.py @@ -26,38 +26,30 @@ def test_neuron_initialization(): def test_synapsetransformer_initialization(): - synapsetransformer = SynapseTransformer(input_dim=512, - output_dim=256, - nhead=8) + synapsetransformer = SynapseTransformer(input_dim=512, output_dim=256, nhead=8) assert isinstance(synapsetransformer, SynapseTransformer) def test_synapsetransformer_forward(): - synapsetransformer = SynapseTransformer(input_dim=512, - output_dim=256, - nhead=8) + synapsetransformer = SynapseTransformer(input_dim=512, output_dim=256, nhead=8) x = torch.randn(10, 32, 512) output = synapsetransformer(x) assert output.shape == torch.Size([10, 32, 256]) def test_nntransformer_initialization(): - nntransformer = NNTransformer(neuron_count=5, - num_states=10, - input_dim=512, - output_dim=256, - nhead=8) + nntransformer = NNTransformer( + neuron_count=5, num_states=10, input_dim=512, output_dim=256, nhead=8 + ) assert isinstance(nntransformer, NNTransformer) assert len(nntransformer.neurons) == 5 assert len(nntransformer.synapses) == 5 def test_nntransformer_forward(): - nntransformer = NNTransformer(neuron_count=5, - num_states=10, - input_dim=512, - output_dim=256, - nhead=8) + nntransformer = NNTransformer( + neuron_count=5, num_states=10, input_dim=512, output_dim=256, nhead=8 + ) x = torch.randn(1, 10) output = nntransformer(x) assert output.shape == torch.Size([10]) diff --git a/tests/particle_swarm.py b/tests/particle_swarm.py index 8450e11..b54ccdf 100644 --- a/tests/particle_swarm.py +++ b/tests/particle_swarm.py @@ -5,7 +5,6 @@ class TestParticleSwarmOptimization(unittest.TestCase): - def setUp(self): self.pso = ParticleSwarmOptimization(goal="Hello", n_particles=10) @@ -30,9 +29,11 @@ def test_optimize(self): self.pso.optimize(iterations=10) # After optimization, global best should be closer to the goal initial_distance = torch.norm( - (initial_best_particle - self.pso.goal).float()).item() + (initial_best_particle - self.pso.goal).float() + ).item() final_distance = torch.norm( - (self.pso.global_best - self.pso.goal).float()).item() + (self.pso.global_best - self.pso.goal).float() + ).item() self.assertLess(final_distance, initial_distance) diff --git a/tests/queen_bee.py b/tests/queen_bee.py index 4668c6d..74d9c8e 100644 --- a/tests/queen_bee.py +++ b/tests/queen_bee.py @@ -4,7 +4,6 @@ class TestQueenBeeGa(unittest.TestCase): - def setUp(self): self.optimizer = QueenBeeGa(goal="Hello QBGA", pop_size=50) diff --git a/tests/swarmalator_base.py b/tests/swarmalator_base.py index 82daf3f..2138295 100644 --- a/tests/swarmalator_base.py +++ b/tests/swarmalator_base.py @@ -50,24 +50,27 @@ def test_pairwise_distances_symmetry(): def test_function_for_x_shape(): xi = torch.randn(N, D) sigma_i = torch.randn(N, D) - dx = function_for_x(xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, - epsilon_r, R, D) + dx = function_for_x( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) assert dx.shape == (N, D) def test_function_for_x_output_range(): xi = torch.randn(N, D) sigma_i = torch.randn(N, D) - dx = function_for_x(xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, - epsilon_r, R, D) + dx = function_for_x( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) assert (dx >= -1.0).all() and (dx <= 1.0).all() def test_function_for_x_zero_at_equilibrium(): xi = torch.zeros(N, D) sigma_i = torch.zeros(N, D) - dx = function_for_x(xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, - epsilon_r, R, D) + dx = function_for_x( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) assert (dx == 0.0).all() @@ -77,24 +80,27 @@ def test_function_for_x_zero_at_equilibrium(): def test_function_for_sigma_shape(): xi = torch.randn(N, D) sigma_i = torch.randn(N, D) - d_sigma = function_for_sigma(xi, sigma_i, N, J, alpha, beta, gamma, - epsilon_a, epsilon_r, R, D) + d_sigma = function_for_sigma( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) assert d_sigma.shape == (N, D) def test_function_for_sigma_output_range(): xi = torch.randn(N, D) sigma_i = torch.randn(N, D) - d_sigma = function_for_sigma(xi, sigma_i, N, J, alpha, beta, gamma, - epsilon_a, epsilon_r, R, D) + d_sigma = function_for_sigma( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) assert (d_sigma >= -1.0).all() and (d_sigma <= 1.0).all() def test_function_for_sigma_zero_at_equilibrium(): xi = torch.zeros(N, D) sigma_i = torch.zeros(N, D) - d_sigma = function_for_sigma(xi, sigma_i, N, J, alpha, beta, gamma, - epsilon_a, epsilon_r, R, D) + d_sigma = function_for_sigma( + xi, sigma_i, N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D + ) assert (d_sigma == 0.0).all() @@ -102,17 +108,9 @@ def test_function_for_sigma_zero_at_equilibrium(): def test_simulate_swarmalators_output_shape(): - results_xi, results_sigma_i = simulate_swarmalators(N, - J, - alpha, - beta, - gamma, - epsilon_a, - epsilon_r, - R, - D, - T=T, - dt=dt) + results_xi, results_sigma_i = simulate_swarmalators( + N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=T, dt=dt + ) assert len(results_xi) == T assert len(results_sigma_i) == T assert results_xi[0].shape == (N, D) @@ -120,22 +118,12 @@ def test_simulate_swarmalators_output_shape(): def test_simulate_swarmalators_convergence(): - results_xi, results_sigma_i = simulate_swarmalators(N, - J, - alpha, - beta, - gamma, - epsilon_a, - epsilon_r, - R, - D, - T=T, - dt=dt) + results_xi, results_sigma_i = simulate_swarmalators( + N, J, alpha, beta, gamma, epsilon_a, epsilon_r, R, D, T=T, dt=dt + ) for i in range(1, T): assert torch.allclose(results_xi[i], results_xi[i - 1], atol=1e-6) - assert torch.allclose(results_sigma_i[i], - results_sigma_i[i - 1], - atol=1e-6) + assert torch.allclose(results_sigma_i[i], results_sigma_i[i - 1], atol=1e-6) def test_simulate_swarmalators_non_zero_initial_condition(): diff --git a/tests/transformer_hive.py b/tests/transformer_hive.py new file mode 100644 index 0000000..6e1b99f --- /dev/null +++ b/tests/transformer_hive.py @@ -0,0 +1,79 @@ +import pytest +import torch +from swarms_torch.hivemind_swarm_transformer import HivemindSwarm + + +# Create a fixture for the HivemindSwarm model +@pytest.fixture +def swarm_model(): + return HivemindSwarm( + dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=3 + ) + + +# Test the basic functionality of HivemindSwarm +def test_hivemind_swarm_forward(swarm_model): + x = torch.randint(0, 20000, (1, 32)) + y = swarm_model(x) + assert y.shape == (1, 32, 512) + + +# Test if the swarm consists of the correct number of transformers +def test_num_transformers_in_swarm(swarm_model): + assert len(list(swarm_model.experts)) == 3 + + +# Test if the gate mechanism works as expected +def test_gate_mechanism(swarm_model): + x = torch.randint(0, 20000, (1, 32)) + outputs = torch.stack([expert(x) for expert in swarm_model.experts], dim=1) + gate = swarm_model.gate_activation( + swarm_model.gate_bias + swarm_model.gate(outputs) + ) + + # Check if the gate values sum to 1 along the transformer dimension + assert torch.allclose(gate.sum(dim=-1), torch.ones(1, 3)) + + +# Test if the model can handle different input shapes +def test_different_input_shapes(swarm_model): + x1 = torch.randint(0, 20000, (1, 32)) + x2 = torch.randint(0, 20000, (1, 16)) + y1 = swarm_model(x1) + y2 = swarm_model(x2) + assert y1.shape == (1, 32, 512) + assert y2.shape == (1, 16, 512) + + +# Test if the model can handle different numbers of models in the swarm +def test_different_num_models(): + swarm_model_1 = HivemindSwarm( + dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=1 + ) + swarm_model_2 = HivemindSwarm( + dim=512, max_seq_len=32, depth=6, heads=8, dim_head=64, num_models=5 + ) + + x = torch.randint(0, 20000, (1, 32)) + y1 = swarm_model_1(x) + y2 = swarm_model_2(x) + + assert y1.shape == (1, 32, 512) + assert y2.shape == (1, 32, 512) + + +# Test if the model works with different configurations +def test_different_configurations(): + model_1 = HivemindSwarm( + dim=256, max_seq_len=16, depth=4, heads=4, dim_head=64, num_models=2 + ) + model_2 = HivemindSwarm( + dim=1024, max_seq_len=64, depth=8, heads=16, dim_head=128, num_models=4 + ) + + x = torch.randint(0, 20000, (1, 16)) + y1 = model_1(x) + y2 = model_2(x) + + assert y1.shape == (1, 16, 256) + assert y2.shape == (1, 16, 1024) diff --git a/tests/transformer_pso.py b/tests/transformer_pso.py index 4d87893..caa53f9 100644 --- a/tests/transformer_pso.py +++ b/tests/transformer_pso.py @@ -7,20 +7,16 @@ def test_simpletransformer_initialization(): - simpletransformer = SimpleTransformer(input_dim=10, - d_model=512, - nhead=8, - num_layers=1, - output_dim=2) + simpletransformer = SimpleTransformer( + input_dim=10, d_model=512, nhead=8, num_layers=1, output_dim=2 + ) assert isinstance(simpletransformer, SimpleTransformer) def test_simpletransformer_forward(): - simpletransformer = SimpleTransformer(input_dim=10, - d_model=512, - nhead=8, - num_layers=1, - output_dim=2) + simpletransformer = SimpleTransformer( + input_dim=10, d_model=512, nhead=8, num_layers=1, output_dim=2 + ) x = torch.randint(0, 10, (10, 32)) output = simpletransformer(x) assert output.shape == torch.Size([32, 2]) @@ -35,8 +31,9 @@ def test_TransformerParticleSwarmOptimization_initialization(): [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], batch_size=32, ) - pso = TransformerParticleSwarmOptimization(model_constructor, model_args, - device, criterion, data_loader) + pso = TransformerParticleSwarmOptimization( + model_constructor, model_args, device, criterion, data_loader + ) assert isinstance(pso, TransformerParticleSwarmOptimization) assert len(pso.particles) == 10 assert len(pso.velocities) == 10 @@ -52,8 +49,9 @@ def test_TransformerParticleSwarmOptimization_compute_fitness(): [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], batch_size=32, ) - pso = TransformerParticleSwarmOptimization(model_constructor, model_args, - device, criterion, data_loader) + pso = TransformerParticleSwarmOptimization( + model_constructor, model_args, device, criterion, data_loader + ) fitness = pso.compute_fitness(pso.particles[0].state_dict()) assert isinstance(fitness, float) @@ -67,8 +65,9 @@ def test_TransformerParticleSwarmOptimization_update(): [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], batch_size=32, ) - pso = TransformerParticleSwarmOptimization(model_constructor, model_args, - device, criterion, data_loader) + pso = TransformerParticleSwarmOptimization( + model_constructor, model_args, device, criterion, data_loader + ) pso.update() assert len(pso.particles) == 10 assert len(pso.velocities) == 10 @@ -84,8 +83,9 @@ def test_TransformerParticleSwarmOptimization_optimize(): [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], batch_size=32, ) - pso = TransformerParticleSwarmOptimization(model_constructor, model_args, - device, criterion, data_loader) + pso = TransformerParticleSwarmOptimization( + model_constructor, model_args, device, criterion, data_loader + ) pso.optimize(iterations=10) assert len(pso.particles) == 10 assert len(pso.velocities) == 10 @@ -101,8 +101,9 @@ def test_TransformerParticleSwarmOptimization_get_best_model(): [(torch.randint(0, 10, (10,)), torch.tensor(1)) for _ in range(100)], batch_size=32, ) - pso = TransformerParticleSwarmOptimization(model_constructor, model_args, - device, criterion, data_loader) + pso = TransformerParticleSwarmOptimization( + model_constructor, model_args, device, criterion, data_loader + ) pso.optimize(iterations=10) best_model = pso.get_best_model() assert isinstance(best_model, SimpleTransformer)