From c21a7bc08a5376e47c161822de0c23cc09b29c32 Mon Sep 17 00:00:00 2001 From: Tom Lin Date: Sun, 29 Sep 2024 03:16:07 +0100 Subject: [PATCH] Fix broken SSH key fixture Add partition and gres support Add stable id for users Add sudorule support --- README.md | 4 ++-- playbook-task-sync-nodes.yml | 11 ++++++++++- playbook-task-sync-users.yml | 27 +++++++++++++++++++++++++++ playbook-task-tests.yml | 2 +- staging.rb | 29 +++++++++++++++++++++++------ templates/slurm.nodes.conf.j2 | 8 +++++++- 6 files changed, 70 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d64d77b..7f46521 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ This section is written against a Fedora host environment, you may need to adjus ```shell pipx install ansible - pipx inject ansible passlib + pipx inject ansible passlib cryptography ``` See for more installation methods. @@ -128,7 +128,7 @@ ungrouped: unattended_security_update_interval: "Mon *-*-1..7 00:00:00" srun_port_range: 60001-65000 storage_pool: local-lvm - ssh_private_key: "" + test_ssh_private_key: "" arch_to_dns_map: x86_64: amd64 aarch64: arm64 diff --git a/playbook-task-sync-nodes.yml b/playbook-task-sync-nodes.yml index c6babce..0021679 100644 --- a/playbook-task-sync-nodes.yml +++ b/playbook-task-sync-nodes.yml @@ -17,6 +17,15 @@ firstmatch: yes line: "" + - name: Update slurm gres config + ansible.builtin.blockinfile: + path: /etc/slurm/gres.conf + block: | + AutoDetect=nvml + AutoDetect=rsmi + AutoDetect=nrt + AutoDetect=oneapi + - name: Update slurm stateless partition and node definitions ansible.builtin.blockinfile: path: /etc/slurm/slurm.conf @@ -29,7 +38,7 @@ ReturnToService=2 MaxNodeCount=65536 SrunPortRange={{srun_port_range}} - PartitionName=compute Default=YES MaxTime=2-00 State=UP Nodes=ALL + GresTypes=gpu {{ lookup('ansible.builtin.template', './slurm.nodes.conf.j2') }} - name: Restart slurmctld diff --git a/playbook-task-sync-users.yml b/playbook-task-sync-users.yml index e322274..07ed323 100644 --- a/playbook-task-sync-users.yml +++ b/playbook-task-sync-users.yml @@ -3,6 +3,19 @@ become: true tasks: + - name: Ensure IPA sudo group + freeipa.ansible_freeipa.ipagroup: + ipaadmin_password: "{{ipa_password}}" + name: sudo + + - name: Ensure IPA sudo rule + freeipa.ansible_freeipa.ipasudorule: + ipaadmin_password: "{{ipa_password}}" + name: sudo + group: sudo + cmdcat: all + hostcat: all + - name: Query uids command: ipa --no-prompt user-find --raw register: ipa_users @@ -35,4 +48,18 @@ last: "{{item.value.last}}" email: "{{item.value.email}}" sshpubkey: "{{item.value.publickey}}" + uid: "{{range(1000, 32768) | random(seed=item.key)}}" + gid: "{{range(1000, 32768) | random(seed=item.key)}}" + state: present + password: "" + update_password: on_create + loop: "{{users | dict2items}}" + + - name: Ensure user sudo rules + freeipa.ansible_freeipa.ipagroup: + ipaadmin_password: "{{ipa_password}}" + name: sudo + action: member + user: "{{item.key}}" loop: "{{users | dict2items}}" + when: item.value.group == "sudo" diff --git a/playbook-task-tests.yml b/playbook-task-tests.yml index 2bb1fcb..091d9c4 100644 --- a/playbook-task-tests.yml +++ b/playbook-task-tests.yml @@ -6,6 +6,6 @@ - name: Run tests vars: ansible_user: "{{item.key}}" - ansible_ssh_private_key_file: "{{ssh_private_key}}" + ansible_ssh_private_key_file: "{{test_ssh_private_key}}" include_tasks: tasks/tests.yml loop: "{{users | dict2items}}" diff --git a/staging.rb b/staging.rb index 0606a09..088cb67 100644 --- a/staging.rb +++ b/staging.rb @@ -45,7 +45,7 @@ def self.common_vars(storage_pool) unattended_security_update_interval: 'Mon *-*-1..7 00:00:00', srun_port_range: '60001-65000', storage_pool: storage_pool, - ssh_private_key: SSH_PRIVATE_KEY, + test_ssh_private_key: SSH_PRIVATE_KEY, delete_templates: true, arch_to_dns_map: { x86_64: 'amd64', @@ -59,7 +59,7 @@ def self.write_inventory(pve_ip:, storage_pool:, extra_hosts:, host_common_hash: raise "Public key file #{SSH_PUBLIC_KEY} not found" unless File.file?(SSH_PUBLIC_KEY) raise "Public key file #{SSH_PRIVATE_KEY} not found" unless File.file?(SSH_PRIVATE_KEY) - ssh_pub_keys = [File.read(SSH_PUBLIC_KEY).strip, File.read(SSH_PUBLIC_KEY).strip] + ssh_pub_keys = [File.read(SSH_PUBLIC_KEY).strip, "#{File.read(SSH_PUBLIC_KEY).strip}_copy"] test_password = 'vagrant0' # IPA needs >= 8 characters ssh_keys = ssh_pub_keys pve_vars = { @@ -142,6 +142,17 @@ def self.write_inventory(pve_ip:, storage_pool:, extra_hosts:, host_common_hash: } } + partitions = { + host: { + max_time: '1-00:00:00', + extra: 'Default=YES' + }, + arm: { + max_time: '0-03:00:00', + extra: '' + } + } + compute_nodes = { "compute0.#{DOMAIN}": { ip: '10.10.10.150', @@ -156,7 +167,8 @@ def self.write_inventory(pve_ip:, storage_pool:, extra_hosts:, host_common_hash: cores_per_socket: 4, pve_disk_size: '1G', pve_mem_gb: 10, # Otherwise iPXE runs out of memory decompressing initramfs - pve_ncores: 4 + pve_ncores: 4, + partition: 'host' }, "compute1.#{DOMAIN}": { ip: '10.10.10.151', @@ -171,12 +183,14 @@ def self.write_inventory(pve_ip:, storage_pool:, extra_hosts:, host_common_hash: cores_per_socket: 4, pve_disk_size: '1G', pve_mem_gb: 10, # Otherwise iPXE runs out of memory decompressing initramfs - pve_ncores: 4 + pve_ncores: 4, + partition: 'arm' } } common_vars = common_vars(storage_pool) extra_inventory = { + "root_sshkeys": ssh_pub_keys, "ungrouped": { "hosts": extra_hosts.merge( @@ -198,19 +212,22 @@ def self.write_inventory(pve_ip:, storage_pool:, extra_hosts:, host_common_hash: login_node_vars ).merge( "all_arch": %w[x86_64 aarch64], + "partitions": partitions, "nodes": compute_nodes, "users": { "foo": { "first": 'foo', "last": 'foo', "email": 'foo@example.com', - "publickey": ssh_pub_keys + "publickey": ssh_keys, + "group": '' }, "bar": { "first": 'bar', "last": 'bar', "email": 'bar@example.com', - "publickey": ssh_pub_keys + "publickey": ssh_keys, + "group": 'sudo' } } ) diff --git a/templates/slurm.nodes.conf.j2 b/templates/slurm.nodes.conf.j2 index 686cd4c..e21475b 100644 --- a/templates/slurm.nodes.conf.j2 +++ b/templates/slurm.nodes.conf.j2 @@ -1,5 +1,11 @@ +{% for part, vars in partitions.items() %} +{% set node_list = [] %} +{% for node, node_vars in nodes.items() if node_vars.partition == part %} +{% set _ = node_list.append(node) %} +{% endfor %} +PartitionName={{part}} State=UP MaxTime={{vars.max_time}} {{vars.extra}} Nodes={{node_list | join(',')}} +{% endfor %} {% for node, vars in nodes.items() %} NodeName={{node}} Sockets={{vars.sockets}} CoresPerSocket={{vars.cores_per_socket}} ThreadsPerCore={{vars.threads_per_core}} State=UNKNOWN {% endfor %} -