diff --git a/compute5/NGS_Automated/copyProjectDataToPrm.sh b/compute5/NGS_Automated/copyProjectDataToPrm.sh index 7372878e..8878c993 100755 --- a/compute5/NGS_Automated/copyProjectDataToPrm.sh +++ b/compute5/NGS_Automated/copyProjectDataToPrm.sh @@ -35,7 +35,7 @@ echo "Logfiles will be written to $LOGDIR" for line in ${ARR[@]} do projectName=${line} - LOGGER=${LOGDIR}/${projectName}.copyProjectDataToPrm.logger + LOGGER=${LOGDIR}/${projectName}/${projectName}.copyProjectDataToPrm.logger FINISHED="no" @@ -49,7 +49,7 @@ do makeProjectDataDir=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "sh ${PROJECTSDIRPRM}/checkProjectData.sh ${PROJECTSDIRPRM} ${projectName}") copyProjectDataDiagnosticsClusterToPrm="${PROJECTSDIR}/${projectName}/* ${groupname}-dm@calculon.hpc.rug.nl:${PROJECTSDIRPRM}/${projectName}" - if [[ -f $LOGDIR/${projectName}/${projectName}.pipeline.finished && ! -f $LOGDIR/${projectName}/${projectName}.projectDataCopiedToPrm ]] + if [[ -d $LOGDIR/${projectName}/ && ! -f $LOGDIR/${projectName}/${projectName}.projectDataCopiedToPrm ]] then countFilesProjectDataDirTmp=$(ls -R ${PROJECTSDIR}/${projectName}/*/results/ | wc -l) module load hashdeep/4.4-foss-2015b @@ -76,19 +76,16 @@ do then echo "md5sum check failed, the copying will start again" >> ${LOGGER} rsync -r -av --exclude rawdata/ ${copyProjectDataDiagnosticsClusterToPrm} >> $LOGGER 2>&1 - echo "copy failed" >> $LOGDIR/${projectName}.copyProjectDataToPrm.failed + echo "copy failed" >> $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed elif [[ "${COPIEDTOPRM}" == *"PASS"* ]] then - touch $LOGDIR/${projectName}.projectDataCopiedToPrm + touch $LOGDIR/${projectName}/${projectName}.projectDataCopiedToPrm echo "finished copying project data to calculon" >> ${LOGGER} printf "De project data voor project ${projectName} is gekopieerd naar ${PROJECTSDIRPRM}" | mail -s "project data for project ${projectName} is copied to permanent storage" ${ONTVANGER} - mv $LOGDIR/${projectName}.projectDataCopiedToPrm $LOGDIR/${projectName}/ - mv $LOGDIR/${projectName}.copyProjectDataToPrm.logger $LOGDIR/${projectName}/ - - if [ -f $LOGDIR/${projectName}.copyProjectDataToPrm.failed ] + if [ -f $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed ] then - rm $LOGDIR/${projectName}.copyProjectDataToPrm.failed + rm $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed fi fi else @@ -98,9 +95,9 @@ do fi fi - if [ -f $LOGDIR/${projectName}.copyProjectDataToPrm.failed ] + if [ -f $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed ] then - COUNT=$(cat $LOGDIR/${projectName}.copyProjectDataToPrm.failed | wc -l) + COUNT=$(cat $LOGDIR/${projectName}/${projectName}.copyProjectDataToPrm.failed | wc -l) if [ $COUNT == 10 ] then HOSTNA=$(hostname) diff --git a/compute5/NGS_Automated/copyRawDataToDiagnosticsCluster.sh b/compute5/NGS_Automated/copyRawDataToDiagnosticsCluster.sh index 744d8637..0cf90cc1 100755 --- a/compute5/NGS_Automated/copyRawDataToDiagnosticsCluster.sh +++ b/compute5/NGS_Automated/copyRawDataToDiagnosticsCluster.sh @@ -29,19 +29,18 @@ for line in ${gattacaSamplesheets[@]} do csvFile=$(basename $line) filePrefix="${csvFile%.*}" - LOGGER=${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.logger + LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.logger - if [ -d ${LOGDIR}/${filePrefix}/ ] + if [ ! -d ${LOGDIR}/${filePrefix}/ ] then - echo "everything is finished of ${filePrefix}" - continue + mkdir ${LOGDIR}/${filePrefix}/ fi function finish { - if [ -f ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked ] + if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ] then echo "TRAPPED" - rm ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked + rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked fi } trap finish HUP INT QUIT TERM EXIT ERR @@ -62,24 +61,24 @@ do continue; fi - if [ -f $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster ] + if [ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster ] then continue; fi - if [ -f ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked ] + if [ -f ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ] then exit 0 fi - touch ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked + touch ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked ## Check if samplesheet is copied copyRawGatToDiagnosticsCluster="umcg-ateambot@${gattacaAddress}:${GATTACA}/runs/run_${run}_${sequencer}/results/${filePrefix}* ${RAWDATADIR}/$filePrefix" - if [[ ! -f ${SAMPLESHEETSDIR}/$csvFile || ! -f $LOGDIR/${filePrefix}.SampleSheetCopied ]] + if [[ ! -f ${SAMPLESHEETSDIR}/$csvFile || ! -f $LOGDIR/${filePrefix}/${filePrefix}.SampleSheetCopied ]] then scp umcg-ateambot@${gattacaAddress}:${GATTACA}/Samplesheets/${csvFile} ${SAMPLESHEETSDIR} - touch $LOGDIR/${filePrefix}.SampleSheetCopied + touch $LOGDIR/${filePrefix}/${filePrefix}.SampleSheetCopied fi ## Check if data is already copied to DiagnosticsCluster @@ -91,7 +90,7 @@ do fi - if [[ -d ${RAWDATADIR}/$filePrefix && ! -f $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster ]] + if [[ -d ${RAWDATADIR}/$filePrefix && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster ]] then ##Compare how many files are on both the servers in the directory countFilesRawDataDirTmp=$(ls ${RAWDATADIR}/${filePrefix}/${filePrefix}* | wc -l) @@ -110,7 +109,7 @@ do then echo "data copied to DiagnosticsCluster" >> $LOGGER printf ".. done \n" >> $LOGGER - touch $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster + touch $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster touch ${filePrefix}.md5sums.checked else echo "md5sum check failed, the copying will start again" >> $LOGGER @@ -124,7 +123,7 @@ do echo "data copied to DiagnosticsCluster" >> $LOGGER fi fi -rm ${LOGDIR}/${filePrefix}.copyToDiagnosticsCluster.locked +rm ${LOGDIR}/${filePrefix}/${filePrefix}.copyToDiagnosticsCluster.locked done trap - EXIT diff --git a/compute5/NGS_Automated/copyRawDataToPrm.sh b/compute5/NGS_Automated/copyRawDataToPrm.sh index 5abc3596..e5183c1a 100755 --- a/compute5/NGS_Automated/copyRawDataToPrm.sh +++ b/compute5/NGS_Automated/copyRawDataToPrm.sh @@ -35,7 +35,7 @@ for line in ${ARR[@]} do csvFile=$(basename $line) filePrefix="${csvFile%.*}" - LOGGER=${LOGDIR}/${filePrefix}.copyToPrm.logger + LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.copyToPrm.logger FINISHED="no" OLDIFS=$IFS @@ -45,11 +45,6 @@ do run=$3 IFS=$OLDIFS - if [ -d ${LOGDIR}/${filePrefix}/ ] - then - continue - fi - if [ -f ${LOGDIR}/copyDataToPrm.sh.locked ] then echo "copyToPrm is locked" @@ -61,12 +56,6 @@ do ##get header to decide later which column is project HEADER=$(head -1 ${line}) - if [ -d ${LOGDIR}/${filePrefix}/ ] - then - echo "(startPipeline) everything is finished of ${filePrefix}" - continue - fi - ##Remove header, only want to keep samples sed '1d' $line > ${LOGDIR}/TMP/${filePrefix}.utmp OLDIFS=$IFS @@ -96,7 +85,7 @@ do copyRawDiagnosticsClusterToPrm="${RAWDATADIR}/${filePrefix}/* ${groupname}-dm@calculon.hpc.rug.nl:${RAWDATADIRPRM}/${filePrefix}" makeRawDataDir=$(ssh ${groupname}-dm@calculon.hpc.rug.nl "sh ${RAWDATADIRPRM}/../checkRawData.sh ${RAWDATADIRPRM} ${filePrefix}") - if [[ -f $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}.dataCopiedToPrm ]] + if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToPrm ]] then countFilesRawDataDirTmp=$(ls ${RAWDATADIR}/${filePrefix}/${filePrefix}* | wc -l) if [ "${makeRawDataDir}" == "f" ] @@ -115,22 +104,20 @@ do then echo "md5sum check failed, the copying will start again" >> ${LOGGER} rsync -r -av ${copyRawDiagnosticsClusterToPrm} >> $LOGGER 2>&1 - echo "copy failed" >> $LOGDIR/${filePrefix}.failed + echo "copy failed" >> $LOGDIR/${filePrefix}/${filePrefix}.failed elif [[ "${COPIEDTOPRM}" == *"PASS"* ]] then scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@calculon.hpc.rug.nl:${RAWDATADIRPRM}/${filePrefix}/ scp ${SAMPLESHEETSDIR}/${csvFile} ${groupname}-dm@calculon.hpc.rug.nl:${SAMPLESHEETSPRMDIR} echo "finished copying data to calculon" >> ${LOGGER} - - mkdir $LOGDIR/${filePrefix}/ - echo "Moving ${filePrefix} logfiles to $LOGDIR/${filePrefix}/ and removing tmp finished files" >> $LOGGER - rm $LOGDIR/${filePrefix}.SampleSheetCopied - rm $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster - mv $LOGDIR/${filePrefix}.copyToDiagnosticsCluster.logger $LOGDIR/${filePrefix}/ - mv $LOGDIR/${filePrefix}.copyToPrm.logger $LOGDIR/${filePrefix}/ - mv ${LOGDIR}/TMP/${filePrefix}.unique.projects $LOGDIR/${filePrefix}/projects.txt + #rm $LOGDIR/${filePrefix}.SampleSheetCopied + #rm $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster + #mv $LOGDIR/${filePrefix}.copyToDiagnosticsCluster.logger $LOGDIR/${filePrefix}/ + #mv $LOGDIR/${filePrefix}.copyToPrm.logger $LOGDIR/${filePrefix}/ + #mv ${LOGDIR}/TMP/${filePrefix}.unique.projects $LOGDIR/${filePrefix}/projects.txt echo "finished with rawdata" >> ${LOGDIR}/${filePrefix}/${filePrefix}.copyToPrm.logger + if ls ${RAWDATADIR}/${filePrefix}/${filePrefix}*.log 1> /dev/null 2>&1 then logFileStatistics=$(cat ${RAWDATADIR}/${filePrefix}/${filePrefix}*.log) @@ -140,9 +127,9 @@ do fi echo -e "De data voor project ${filePrefix} is gekopieerd naar ${RAWDATADIRPRM}" | mail -s "${filePrefix} copied to permanent storage" ${ONTVANGER} fi - if [ -f $LOGDIR/${filePrefix}.failed ] + if [ -f $LOGDIR/${filePrefix}/${filePrefix}.failed ] then - rm $LOGDIR/${filePrefix}.failed + rm $LOGDIR/${filePrefix}/${filePrefix}.failed fi fi else @@ -153,9 +140,9 @@ do fi fi - if [ -f $LOGDIR/${filePrefix}.failed ] + if [ -f $LOGDIR/${filePrefix}/${filePrefix}.failed ] then - COUNT=$(cat $LOGDIR/${filePrefix}.failed | wc -l) + COUNT=$(cat $LOGDIR/${filePrefix}/${filePrefix}.failed | wc -l) if [ $COUNT == 10 ] then HOSTNA=$(hostname) diff --git a/compute5/NGS_Automated/pipelineFinished.sh b/compute5/NGS_Automated/pipelineFinished.sh index dc04802a..abd8ff38 100644 --- a/compute5/NGS_Automated/pipelineFinished.sh +++ b/compute5/NGS_Automated/pipelineFinished.sh @@ -22,47 +22,28 @@ for i in ${ALLFINISHED[@]} do filename=$(basename $i) projectName="${filename%%.*}" - if [ ! -d ${LOGDIR}/${projectName}/ ] - then - mkdir -p ${LOGDIR}/${projectName}/ - for i in $(ls ${PROJECTSDIR}/${projectName}/*/rawdata/ngs/*); do if [ -L $i ];then readlink $i > ${LOGDIR}/${projectName}.rawdatalink ; fi;done + for i in $(ls ${PROJECTSDIR}/${projectName}/*/rawdata/ngs/*); do if [ -L $i ];then readlink $i > ${LOGDIR}/${projectName}/${projectName}.rawdatalink ; fi;done - while read line ; do dirname $line > ${LOGDIR}/${projectName}.rawdatalinkDirName; done<${LOGDIR}/${projectName}.rawdatalink + while read line ; do dirname $line > ${LOGDIR}/${projectName}/${projectName}.rawdatalinkDirName; done<${LOGDIR}/${projectName}/${projectName}.rawdatalink - rawDataName=$(while read line ; do basename $line ; done<${LOGDIR}/${projectName}.rawdatalinkDirName) - - if [ -f ${LOGDIR}/${rawDataName}.pipeline.logger ] - then - mv ${LOGDIR}/${rawDataName}.pipeline.logger ${LOGDIR}/${rawDataName}/ - fi - if [ -f ${LOGDIR}/${rawDataName}.scriptsGenerated ] - then - mv ${LOGDIR}/${rawDataName}.scriptsGenerated ${LOGDIR}/${rawDataName}/ - fi + rawDataName=$(while read line ; do basename $line ; done<${LOGDIR}/${projectName}/${projectName}.rawdatalinkDirName) echo "moving ${projectName} files to ${LOGDIR}/${projectName}/ and removing tmp finished files" - if [[ -f ${LOGDIR}/${projectName}.pipeline.logger && -f ${LOGDIR}/${projectName}.pipeline.started && -f ${LOGDIR}/${projectName}.rawdatalink && -f ${LOGDIR}/${projectName}.rawdatalinkDirName ]] + if [[ -f ${LOGDIR}/${projectName}/${projectName}.pipeline.logger && -f ${LOGDIR}/${projectName}/${projectName}.pipeline.started && -f ${LOGDIR}/${projectName}/${projectName}.rawdatalink && -f ${LOGDIR}/${projectName}/${projectName}.rawdatalinkDirName ]] then - mv ${LOGDIR}/${projectName}.pipeline.logger ${LOGDIR}/${projectName}/ - rm ${LOGDIR}/${projectName}.pipeline.started - rm ${LOGDIR}/${projectName}.rawdatalink - rm ${LOGDIR}/${projectName}.rawdatalinkDirName touch ${LOGDIR}/${projectName}/${rawDataName} + mv ${LOGDIR}/${projectName}.pipeline.finished ${LOGDIR}/${projectName}/ + + else + echo "there is/are missing some files:${projectName}.pipeline.logger or ${projectName}.pipeline.started or ${projectName}/${projectName}.rawdatalink or ${projectName}.rawdatalinkDirName" + echo "there is/are missing some files:${projectName}.pipeline.logger or ${projectName}.pipeline.started or ${projectName}/${projectName}.rawdatalink or ${projectName}.rawdatalinkDirName" >> ${LOGDIR}/${projectName}/${projectName}.pipeline.logger fi - if [ -f ${LOGDIR}/${projectName}.pipeline.failed ] - then - mv ${LOGDIR}/${projectName}.pipeline.failed ${LOGDIR}/${projectName}/ - fi - fi if [ ! -f ${LOGDIR}/${projectName}/${projectName}.pipeline.finished.mailed ] then printf "The results can be found: ${PROJECTSDIR}/${projectName} \n\nCheers from the GCC :)"| mail -s "NGS_DNA pipeline is finished for project ${projectName} on `date +%d/%m/%Y` `date +%H:%M`" ${ONTVANGER} touch ${LOGDIR}/${projectName}/${projectName}.pipeline.finished.mailed - rm ${LOGDIR}/${projectName}.pipeline.finished fi - - done diff --git a/compute5/NGS_Automated/startPipeline.sh b/compute5/NGS_Automated/startPipeline.sh index 98c4ffd0..2aa96872 100755 --- a/compute5/NGS_Automated/startPipeline.sh +++ b/compute5/NGS_Automated/startPipeline.sh @@ -32,12 +32,6 @@ do ##get header to decide later which column is project HEADER=$(head -1 ${i}) - if [ -d ${LOGDIR}/${filePrefix}/ ] - then - echo "(startPipeline) everything is finished of ${filePrefix}" - continue - fi - ##Remove header, only want to keep samples sed '1d' $i > ${LOGDIR}/TMP/${filePrefix}.tmp OLDIFS=$IFS @@ -77,7 +71,7 @@ do miSeqRun="no" while read line do - if [[ "${line}" == *"CARDIO"* || "${line}" == *"DER_v1"* || "${line}" == *"DYS_v3"* || "${line}" == *"EPI_v3"* || "${line}" == *"LEVER_v1"* || "${line}" == *"NEURO_v1"* || "${line}" == *"ONCO_v1"* || "${line}" == *"PCS_v1"* ]] + if [[ "${line}" == *"CARDIO_v"* || "${line}" == *"DER_v"* || "${line}" == *"DYS_v"* || "${line}" == *"EPI_v"* || "${line}" == *"LEVER_v"* || "${line}" == *"NEURO_v"* || "${line}" == *"ONCO_v"* || "${line}" == *"PCS_v"* ]] then miSeqRun="yes" break @@ -90,13 +84,13 @@ do sequencer=$2 run=$3 IFS=$OLDIFS - LOGGER=${LOGDIR}/${filePrefix}.pipeline.logger + LOGGER=${LOGDIR}/${filePrefix}/${filePrefix}.pipeline.logger #### ### Decide if the scripts should be created (per Samplesheet) ## # - if [[ -f $LOGDIR/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}.scriptsGenerated ]] + if [[ -f $LOGDIR/${filePrefix}/${filePrefix}.dataCopiedToDiagnosticsCluster && ! -f $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated ]] then ### Step 4: Does the pipeline need to run? if [ "${pipeline}" == "RNA-Lexogen-reverse" ] @@ -149,7 +143,7 @@ do cd scripts sh submit.sh - touch $LOGDIR/${filePrefix}.scriptsGenerated + touch $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated fi fi @@ -157,20 +151,25 @@ do ### If generatedscripts is already done, step in this part to submit the jobs (per project) ## # - if [ -f $LOGDIR/${filePrefix}.scriptsGenerated ] + if [ -f $LOGDIR/${filePrefix}/${filePrefix}.scriptsGenerated ] then for PROJECT in ${PROJECTARRAY[@]} do + if [ ! -d ${LOGDIR}/${PROJECT} ] + then + mkdir ${LOGDIR}/${PROJECT} + fi + WHOAMI=$(whoami) HOSTN=$(hostname) - LOGGER=${LOGDIR}/${PROJECT}.pipeline.logger - if [ ! -f ${LOGDIR}/${PROJECT}.pipeline.started ] + LOGGER=${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.logger + if [ ! -f ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.started ] then cd ${PROJECTSDIR}/${PROJECT}/run01/jobs/ sh submit.sh - touch ${LOGDIR}/${PROJECT}.pipeline.started - echo "${LOGDIR}/${PROJECT} started" >> $LOGGER + touch ${LOGDIR}/${PROJECT}/${PROJECT}.pipeline.started + echo "${PROJECT} started" >> $LOGGER printf "Pipeline: ${pipeline}\nStarttime:`date +%d/%m/%Y` `date +%H:%M`\nProject: $PROJECT\nStarted by: $WHOAMI\nHost: ${HOSTN}\n\nProgress can be followed via the command squeue -u $WHOAMI on $HOSTN.\nYou will receive an email when the pipeline is finished!\n\nCheers from the GCC :)" | mail -s "NGS_DNA pipeline is started for project $PROJECT on `date +%d/%m/%Y` `date +%H:%M`" ${ONTVANGER} sleep 40