-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add original code from encoded checkfiles
- Loading branch information
1 parent
415e7d9
commit dce695b
Showing
8 changed files
with
1,405 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,3 +99,11 @@ ENV/ | |
|
||
# mypy | ||
.mypy_cache/ | ||
|
||
|
||
# Added from encoded repo | ||
/bin/ | ||
/include/ | ||
/lib/ | ||
pip-selfcheck.json | ||
pyvenv.cfg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,24 @@ | ||
# checkfiles | ||
Files are checked to see if the MD5 sum (both for gzipped and ungzipped) is identical to the submitted metadata, as well as run through the validateFiles program from jksrc. | ||
Check Files | ||
=========== | ||
|
||
Files are checked to see if the MD5 sum (both for gzipped and ungzipped) is identical to the submitted metadata, as well as run through | ||
the validateFiles program from jksrc (http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/validateFiles). | ||
It operates on files in the 'uploading' state (according to the encodeD database) in the encode-files S3 bucket. | ||
Checkfiles is used by the ENCODE DCC to validate genomic datafiles submitted by labs. | ||
The bucket itself is mounted using Goofys (https://github.com/kahing/goofys). | ||
Errors are reported back to encodeD. | ||
|
||
Setup | ||
----- | ||
|
||
Install required packages for running deploy:: | ||
|
||
pyvenv . | ||
bin/pip install -r requirements-deploy.txt | ||
|
||
Deploy | ||
------ | ||
|
||
Supply arguments for checkfiles after a ``--`` separator:: | ||
|
||
bin/python deploy.py -- --username ACCESS_KEY_ID --password SECRET_ACCESS_KEY --bot-token SLACK-BOT-TOKEN https://www.encodeproject.org |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#cloud-config | ||
|
||
# Launch instance with network interfaces configured to the local IP addresses we references in the config. | ||
# $ aws ec2 run-instances --user-data file://check-files.yml --iam-instance-profile Name="encoded-instance" --image-id ami-5a928a3b --region us-west-2 --security-groups ssh-http-https --instance-type c4.xlarge | ||
|
||
bootcmd: | ||
- cloud-init-per once ssh-users-ca echo "TrustedUserCAKeys /etc/ssh/users_ca.pub" >> /etc/ssh/sshd_config | ||
|
||
output: | ||
all: '| tee -a /var/log/cloud-init-output.log' | ||
|
||
packages: | ||
- fuse | ||
## - golang | ||
- git | ||
- awscli | ||
- curl | ||
- ntp | ||
- python3-dev | ||
- python3-venv | ||
|
||
## power_state: | ||
## mode: poweroff | ||
|
||
runcmd: | ||
|
||
- set -ex | ||
- systemctl daemon-reload # See https://bugs.launchpad.net/cloud-init/+bug/1449318 | ||
- mkdir -p /s3/encode-files | ||
- mkdir -p /s3/encoded-files-dev | ||
|
||
- curl -sS -L -o /usr/local/bin/goofys https://github.com/kahing/goofys/releases/download/v0.0.5/goofys | ||
- chmod +x /usr/local/bin/goofys | ||
|
||
## - mkdir -p /opt/goofys/bin | ||
## - chown -R build:build /opt/goofys | ||
## - sudo -u build GOPATH=/opt/goofys go get github.com/kahing/goofys | ||
## - mv /opt/goofys/bin/goofys /usr/local/bin/ | ||
|
||
- mount -a | ||
|
||
- mkdir /opt/encValData | ||
- chown build:build /opt/encValData | ||
- sudo -u build git clone --depth 1 https://github.com/ENCODE-DCC/encValData /opt/encValData | ||
|
||
- curl -sS -L -o /usr/local/bin/validateFiles http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/validateFiles | ||
- chmod +x /usr/local/bin/validateFiles | ||
|
||
- mkdir /opt/encoded | ||
- chown build:build /opt/encoded | ||
- sudo -u build git clone --no-checkout https://github.com/ENCODE-DCC/encoded.git /opt/encoded | ||
- sudo -u build git -C /opt/encoded checkout %(COMMIT)s | ||
- chmod +x /opt/encoded/checkfiles/script | ||
|
||
- cd /opt/encoded/checkfiles | ||
- sudo -u build pyvenv . | ||
- sudo -u build bin/pip install -r requirements.txt | ||
|
||
- cd /home/ubuntu | ||
- nohup /opt/encoded/checkfiles/script $(cat /opt/checkfiles_args.txt) 2> errors.log 1> output.log & | ||
|
||
users: | ||
- default | ||
- name: build | ||
gecos: Build user | ||
inactive: true | ||
system: true | ||
|
||
mounts: | ||
- [ "goofys-ulimit#encode-files", "/s3/encode-files", "fuse", "_netdev,allow_other,--file-mode=0444,--dir-mode=0555,--stat-cache-ttl=0", "0", "0" ] | ||
- [ "goofys-ulimit#encoded-files-dev", "/s3/encoded-files-dev", "fuse", "_netdev,allow_other,--file-mode=0444,--dir-mode=0555,--stat-cache-ttl=0", "0", "0" ] | ||
|
||
write_files: | ||
- path: /opt/checkfiles_args.txt | ||
content: | | ||
%(ARGS)s | ||
- path: /etc/ssh/users_ca.pub | ||
content: ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAv/ymOcnN4LhM4NACc3Or116XXJ6KytuOgB/+1qNkOFBqBosrn7cmJ35rsoNHRgYNrCsRE9ch74RKsN6H72FtSJgBhGh/9oUK7Os6Fqt3/ZZXxgxIx6ubs/MTgrxrAnujiBxUXMXQhLKMriNMpo8mt4nGYVtLk9PBjiyfncaS8H9ZKoNio9dhP8bmTuYvioAI35dqKdSlVLyzr/XkZxia8Ki+pQ0N6uuiEwMR3ToM+LSp8wpFOOAiu4PEAujRW7us/+1hlpKWfn0J7/V3826joHE+I967Vg/+ikcVhF77JjK1nib879VgCWfmn1HPQosIpk4yJfVgGvRVI7I2nfBPVw== [email protected] | ||
- path: /etc/systemd/system/cloud-final.service.d/override.conf | ||
content: | | ||
[Service] | ||
# See https://bugs.launchpad.net/cloud-init/+bug/1449318 | ||
KillMode=process | ||
- path: /usr/local/bin/goofys-ulimit | ||
permissions: 0755 | ||
content: | | ||
#!/bin/sh | ||
ulimit -n 60000 | ||
exec goofys "$@" | ||
- path: /etc/rc.local | ||
permissions: 0755 | ||
content: | | ||
#!/bin/sh -e | ||
cd /home/ubuntu; nohup /opt/encoded/checkfiles/script %(ARGS)s 2> errors.log 1> output.log & | ||
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import boto3 | ||
import getpass | ||
import re | ||
import shlex | ||
import subprocess | ||
import sys | ||
|
||
BDM = [ | ||
{ | ||
'DeviceName': '/dev/sda1', | ||
'Ebs': { | ||
'VolumeSize': 512, | ||
'VolumeType': 'gp2', | ||
'DeleteOnTermination': True | ||
} | ||
} | ||
] | ||
|
||
|
||
def nameify(s): | ||
name = ''.join(c if c.isalnum() else '-' for c in s.lower()).strip('-') | ||
return re.subn(r'\-+', '-', name)[0] | ||
|
||
|
||
def run(image_id, instance_type, | ||
branch=None, name=None, profile_name=None, args=()): | ||
if branch is None: | ||
branch = subprocess.check_output( | ||
['git', 'rev-parse', '--abbrev-ref', 'HEAD'] | ||
).decode('utf-8').strip() | ||
|
||
commit = subprocess.check_output( | ||
['git', 'rev-parse', '--short', branch]).decode('utf-8').strip() | ||
|
||
if not subprocess.check_output( | ||
['git', 'branch', '-r', '--contains', commit]).strip(): | ||
print("Commit %r not in origin. Did you git push?" % commit) | ||
sys.exit(1) | ||
|
||
username = getpass.getuser() | ||
|
||
if name is None: | ||
name = nameify('checkfiles-%s-%s-%s' % (branch, commit, username)) | ||
|
||
session = boto3.Session(region_name='us-west-2', profile_name=profile_name) | ||
ec2 = session.resource('ec2') | ||
|
||
domain = 'production' if profile_name == 'production' else 'instance' | ||
|
||
if any(ec2.instances.filter( | ||
Filters=[ | ||
{'Name': 'tag:Name', 'Values': [name]}, | ||
{'Name': 'instance-state-name', | ||
'Values': ['pending', 'running', 'stopping', 'stopped']}, | ||
])): | ||
print('An instance already exists with name: %s' % name) | ||
sys.exit(1) | ||
|
||
user_data = subprocess.check_output( | ||
['git', 'show', commit + ':checkfiles/cloud-config.yml'] | ||
).decode('utf-8') | ||
user_data = user_data % { | ||
'COMMIT': commit, | ||
'ARGS': ' '.join(shlex.quote(arg) for arg in args), | ||
} | ||
|
||
reservation = ec2.create_instances( | ||
MinCount=1, | ||
MaxCount=1, | ||
ImageId=image_id, | ||
InstanceType=instance_type, | ||
SecurityGroups=['ssh-http-https'], | ||
BlockDeviceMappings=BDM, | ||
UserData=user_data, | ||
InstanceInitiatedShutdownBehavior='terminate', | ||
IamInstanceProfile={'Name': 'encoded-instance'}, | ||
) | ||
|
||
instance = reservation[0] # Instance:i-34edd56f | ||
print('%s.%s.encodedcc.org' % (instance.instance_id, domain)) | ||
instance.wait_until_exists() | ||
instance.create_tags(Tags=[ | ||
{'Key': 'Name', 'Value': name}, | ||
{'Key': 'branch', 'Value': branch}, | ||
{'Key': 'commit', 'Value': commit}, | ||
{'Key': 'started_by', 'Value': username}, | ||
]) | ||
print('ssh %s.%s.encodedcc.org' % (name, domain)) | ||
print('pending...') | ||
instance.wait_until_running() | ||
print(instance.state['Name']) | ||
|
||
|
||
def main(): | ||
import argparse | ||
|
||
def hostname(value): | ||
if value != nameify(value): | ||
raise argparse.ArgumentTypeError( | ||
"%r is an invalid hostname, only [a-z0-9] and hyphen allowed." | ||
% value) | ||
return value | ||
|
||
parser = argparse.ArgumentParser( | ||
description="Deploy checkfiles on AWS", | ||
) | ||
parser.add_argument( | ||
'-b', '--branch', default=None, help="Git branch or tag") | ||
parser.add_argument( | ||
'-n', '--name', type=hostname, help="Instance name") | ||
parser.add_argument( | ||
'--image-id', default='ami-4b37d42b', | ||
help="ubuntu/images/hvm-ssd/ubuntu-wily-15.10-amd64-server-20160217.1") | ||
parser.add_argument( | ||
'--instance-type', default='c4.xlarge', | ||
help="specify 'c4.8xlarge' if there are many files to check") | ||
parser.add_argument( | ||
'--profile-name', default=None, help="AWS creds profile") | ||
parser.add_argument( | ||
'args', metavar='ARG', nargs='*', help="arguments for checkfiles") | ||
args = parser.parse_args() | ||
|
||
return run(**vars(args)) | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
boto3==1.2.4 | ||
botocore==1.3.28 | ||
docutils==0.12 | ||
jmespath==0.9.0 | ||
python-dateutil==2.4.2 | ||
six==1.10.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
requests==2.9.1 | ||
slackclient==1.0.6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/bin/bash | ||
while : | ||
do | ||
now=$(date +"%Y%m%d%H%M%S") | ||
/opt/encoded/checkfiles/bin/python /opt/encoded/checkfiles/checkfiles.py "$@" --out $now-checkfiles.log --err $now-checkfiles-error.log --include-unexpired-upload 2> $now-errors.log 1> $now-output.log | ||
sleep 480m | ||
done |