diff --git a/Configuration.default b/Configuration.default index f7f74fd..e82c16f 100644 --- a/Configuration.default +++ b/Configuration.default @@ -1,71 +1,109 @@ -#!/bin/csh +#!/bin/sh # # Configuration.default # -# Source this file from any csh script to set the env variables +# Source this file from any bourne shell script to set the +# env variables # +# +# If the MGICONFIG environment variable does not have a local override, +# use the default "live" settings. +# +if [ "${MGICONFIG}" = "" ] +then + MGICONFIG=/usr/local/mgi/live/mgiconfig + export MGICONFIG +fi -if ( ${?MGICONFIG} == 0 ) then - setenv MGICONFIG /usr/local/mgi/live/mgiconfig -endif - -source ${MGICONFIG}/master.config.csh +. ${MGICONFIG}/master.config.sh # install directory -setenv CACHEINSTALLDIR ${DBUTILS}/snpcacheload +CACHEINSTALLDIR=${DBUTILS}/snpcacheload # output dir -setenv CACHEDIR /data/loads/mgi/snpcacheload +CACHEDIR=/data/loads/mgi/snpcacheload # data directory -setenv CACHEDATADIR ${CACHEDIR}/output +CACHEDATADIR=${CACHEDIR}/output # logs directory -setenv CACHELOGSDIR ${CACHEDIR}/logs +CACHELOGSDIR=${CACHEDIR}/logs -# full path to back-end snp database backup file. -# this directory is on the unix server on which the sybase server resides -# e.g. lindon when backing up PROD1_MGI..snp -setenv SNP_BACKUP /extra1/sybase/snp.backup +# directory in which to archive the contents of output and logs directories +ARCHIVEDIR=${CACHEDIR}/archive -# full path to SNP_BACKUP in order to load it from shire (weekly snpcacheload) -setenv PRODSNP_BACKUP /net/mtdoom/vol/lindon${SNP_BACKUP} +export CACHEINSTALLDIR CACHEDIR CACHEDATADIR CACHELOGSDIR ARCHIVEDIR + +# +# settings for snpmarker_weekly.sh +# +# full path to PROD1_MGI..snp backup file on lindon +SNP_BACKUP_LOCALPATH=/extra1/sybase/snp.backup + +export SNP_BACKUP_LOCALPATH + +# source this for file archiving and other utilities +DLAJOBSTREAMFUNC=${DBUTILS}/dlautils/DLAJobStreamFunctions.sh + +export DLAJOBSTREAMFUNC # mgd refseq _LogicalDB_key -setenv REFSEQ_LOGICALDB_KEY 27 +REFSEQ_LOGICALDB_KEY=27 # mgd SNP_ConsensusSnp_Marker _MGIType_key -setenv SNPMRKR_MGITYPE_KEY 32 +SNPMRKR_MGITYPE_KEY=32 # mgd refSNP _LogicalDB_key -setenv CS_LOGICALDB_KEY 73 +CS_LOGICALDB_KEY=73 # mgd SNP_ConsensusSnp _LogicalDB_key -setenv CS_MGITYPE_KEY 30 +CS_MGITYPE_KEY=30 + +export REFSEQ_LOGICALDB_KEY SNPMRKR_MGITYPE_KEY CS_LOGICALDB_KEY CS_MGITYPE_KEY # mgd EntrezGene _LogicalDB_key -setenv EG_LOGICALDB_KEY 55 +EG_LOGICALDB_KEY=55 # mgd MRK_Marker _MGIType_key -setenv MRKR_MGITYPE_KEY 2 +MRKR_MGITYPE_KEY=2 + +# QTL MRK_Marker _Marker_Type_key +MRKR_QTLTYPE_KEY=6 + +export EG_LOGICALDB_KEY MRKR_MGITYPE_KEY MRKR_QTLTYPE_KEY # snp table and bcp file names -setenv ACC_TABLE SNP_Accession -setenv ACC_FILE ${ACC_TABLE}.bcp -setenv TMP_FXN_TABLE TMP_SNP_Marker_Fxn -setenv TMP_FXN_FILE ${TMP_FXN_TABLE}.bcp -setenv SNP_MRK_TABLE SNP_ConsensusSnp_Marker -setenv SNP_MRK_FILE ${SNP_MRK_TABLE}.bcp -setenv SNP_MRK_WITHIN_FILE ${SNP_MRK_TABLE}_Within.bcp -setenv MRKLOC_CACHETABLE MRK_Location_Cache -setenv MRKLOC_CACHEFILE ${MRKLOC_CACHETABLE}.bcp +ACC_TABLE=SNP_Accession +ACC_FILE=${ACC_TABLE}.bcp + +export ACC_TABLE ACC_FILE + +TMP_FXN_TABLE=TMP_SNP_Marker_Fxn +TMP_FXN_FILE=${TMP_FXN_TABLE}.bcp + +export TMP_FXN_TABLE TMP_FXN_FILE + +SNP_MRK_TABLE=SNP_ConsensusSnp_Marker +SNP_MRK_FILE=${SNP_MRK_TABLE}.bcp +SNP_MRK_WITHIN_FILE=${SNP_MRK_TABLE}_Within.bcp + +export SNP_MRK_TABLE SNP_MRK_FILE SNP_MRK_WITHIN_FILE + +MRKLOC_CACHETABLE=MRK_Location_Cache +MRKLOC_CACHEFILE=${MRKLOC_CACHETABLE}.bcp + +export MRKLOC_CACHETABLE MRKLOC_CACHEFILE # max number of lines per bcp file to keep below 2G -setenv MAX_BCP_LINES 25000000 +MAX_BCP_LINES=25000000 # max number of ConsensusSnp keys in a snpmrkwithin.py query batch -setenv MAX_QUERY_BATCH 100000 +MAX_QUERY_BATCH=100000 + +export MAX_BCP_LINES MAX_QUERY_BATCH # Are dbSNP and MGI coordinates synchronized (same mouse genome build) ? -setenv IN_SYNC yes +IN_SYNC=yes + +export IN_SYNC diff --git a/HISTORY b/HISTORY index 64e06e7..ab89d40 100644 --- a/HISTORY +++ b/HISTORY @@ -1,3 +1,47 @@ +TAG: snpcacheload-3-5-1-1 +DATE: 09/06/2006 +STAFF: sc +RELEASE: MGI3.51, build 36 +CHANGES: merge build36 branch to trunK (note this is a second merge + of this branch to the trunk + +TAG: snpcacheload-mgiconfig-7 +DATE: 09/06/2006 +STAFF: sc +RELEASE: MGI3.51, build 36 +CHANGES: +1) added system_docs + +TAG: snpcacheload-mgiconfig-6 +TAG: snpcacheload-mgiconfig-5 +DATE: 08/24/2006 +STAFF: sc +RELEASE: MGI3.51, build 36 +CHANGES: +1) corrected mgidbutilities config var name +2) added sys.stdout.flush + +TAG: snpcacheload-mgiconfig-4 +DATE: 08/10/2006 +STAFF: sc +RELEASE: MGI3.51, build 36 +CHANGES: +1) remove snplocation.py +2) Configuration - removed SNP_BACKUP_REMOTEPATH to mgiconfig + - updated SNP_BACKUP to SNP_BACKUP_LOCALPATH + - added MRKR_QTLTYPE_KEY +3) snpmarker_weekly.sh - updated to use SNP_BACKUP_REMOTEPATH and + SNP_BACKUP_LOCALPATH +4) snpmrkwithin.py - updated to not load distance relationships, only + - update locus_region and determine 'within coordinates of' + +TAG: snpcacheload-mgiconfig-3 +DATE: 08/01/2006 +STAFF: sc +RELEASE: MGI3.51, build 36 +CHANGES: +1) converted to bourne shell + TAG: snpcacheload-3-5-0-4 DATE: 07/01/2006 STAFF: lec diff --git a/Install b/Install index 412d688..d24b47c 100755 --- a/Install +++ b/Install @@ -1,4 +1,4 @@ -#!/bin/csh +#!/bin/sh # # Install @@ -9,19 +9,26 @@ # 2. ln -s snpcacheload-#-#-# snpcacheload # 3. Copy Configuration.default Configuration # 4. Edit Configuration -# +# 5. Install -cd `dirname $0` && source ./Configuration +cd `dirname $0` && . ./Configuration -if ( ! -d ${CACHEDIR} ) then +if [ ! -d ${CACHEDIR} ] +then mkdir -p ${CACHEDIR} -endif +fi -if ( ! -d ${CACHEDATADIR} ) then +if [ ! -d ${CACHEDATADIR} ] +then mkdir -p ${CACHEDATADIR} -endif +fi -if ( ! -d ${CACHELOGSDIR} ) then +if [ ! -d ${CACHELOGSDIR} ] +then mkdir -p ${CACHELOGSDIR} -endif +fi +if [ ! -d ${ARCHIVEDIR} ] +then + mkdir -p ${ARCHIVEDIR} +fi diff --git a/snpmarker.py b/snpmarker.py index 58611e8..f559a34 100755 --- a/snpmarker.py +++ b/snpmarker.py @@ -16,9 +16,11 @@ # 2) bcp files # History # -# 08/17/2005 sc -# - SNP (TR 1560) -# 03/16/2006 sc convert to snp database +# lec 06/30/2006 - modified for mgiconfig +# +# sc 03/16/2006 - convert to snp database +# +# sc 08/17/2005 - SNP (TR 1560) # # lec 06/30/2006 - modified for mgiconfig # @@ -89,6 +91,7 @@ def initialize(): # Effects: queries a database # Throws: db.error, db.connection_exc print 'connecting to mgd and loading markerLookup...%s' % NL + sys.stdout.flush() # set up connection to the mgd database db.useOneConnection(1) db.set_sqlLogin(user, password, mgdServer, mgdDB) @@ -108,8 +111,10 @@ def initialize(): markerLookup[ r['egId'] ] = r['_Marker_key'] print 'connecting to %s..%s ...%s' % (snpServer, snpDB, NL) + sys.stdout.flush() # set up connection the snp database db.useOneConnection(0) + db.useOneConnection(1) db.set_sqlLogin(user, password, snpServer, snpDB) def deleteAccessions(): @@ -119,6 +124,7 @@ def deleteAccessions(): # Effects: queries a database, deletes records from a database # Throws: db.error, db.connection_exc print 'deleting accessions ...%s' % NL + sys.stdout.flush() cmds = [] cmds.append('select a._Accession_key ' + \ @@ -145,6 +151,7 @@ def getMaxAccessionKey(): # current max(_Accession_key) global accKey print 'getting max snp accession key ...%s' % NL + sys.stdout.flush() cmds = [] cmds.append('select max(_Accession_key) ' + \ 'from SNP_Accession') @@ -162,6 +169,7 @@ def createBCP(): print 'creating %s...%s' % (snpMrkrFile, mgi_utils.date()) print 'and %s...%s%s' % (accFile, mgi_utils.date(), NL) print 'querying ... %s' % NL + sys.stdout.flush() cmds = [] # get set of DP_SNP_Marker attributes into a temp table @@ -186,6 +194,7 @@ def createBCP(): results = db.sql(cmds, 'auto') print 'writing bcp file ...%s' % NL + sys.stdout.flush() # current primary key primaryKey = 0 for r in results[1]: @@ -272,6 +281,7 @@ def finalize(): # print 'snpmarker.py start: %s' % mgi_utils.date() +sys.stdout.flush() try: initialize() getMaxAccessionKey() @@ -288,4 +298,5 @@ def finalize(): sys.exit(message) print 'snpmarker.py end: %s' % mgi_utils.date() +sys.stdout.flush() diff --git a/snpmarker.sh b/snpmarker.sh new file mode 100755 index 0000000..b58be9e --- /dev/null +++ b/snpmarker.sh @@ -0,0 +1,357 @@ +#!/bin/sh + +# +# Program: snpmarker.sh +# +# Purpose: +# +# Script for loading SNP_ConsensusSnp_Marker table +# with dbSNP determined snp to marker associations +# and MGI determined snp to marker associations. +# +# Usage: snpmarker.sh [-a] [-r] [-c] [-l] +# where: +# -a is an option to archive output directory +# -r is an option to archive logs directory +# -c is an option to clean output directory +# -l is an option to clean logs directory +# +# History +# +# sc 07/27/2006 - converted to bourne shell +# lec 06/30/2006 - modified for mgiconfig +# sc 03/2006 - convert to snp database add load of snp MRK_Location_Cache +# sc 01/2006 - process multiple snpmrkwithin.bcp files +# dbm 09/28/2005 - Added snpmrklocus.py & snpmrkwithin.py +# sc 08/17/2005 - created + +# +# Establish bcp file delimiters +# + +# bcp file row delimiter +NL="\n" + +# bcp file column delimiter +DL="|" + +# +# Set up a log file for the shell script in case there is an error +# during configuration and initialization. +# +cd `dirname $0` +LOG=`pwd`/snpmarker.log +rm -f ${LOG} + +# +# Verify the argument(s) to the shell script. +# +ARC_OUTPUT=no +ARC_LOGS=no +CLEAN_OUTPUT=no +CLEAN_LOGS=no + +usage="Usage: snpmarker.sh [-a] [-r] [-c] [-l]" +# +# report usage if there are unrecognized arguments +# +set -- `getopt arcl $*` +if [ $? != 0 ] +then + echo ${usage} | tee -a ${LOG} + exit 1 +fi + +# +# determine which options on command line +# +for i in $* +do + case $i in + -a) ARC_OUTPUT=yes; shift;; + -r) ARC_LOGS=yes; shift;; + -c) CLEAN_OUTPUT=yes; shift;; + -l) CLEAN_LOGS=yes; shift;; + --) shift; break;; + esac +done + +# +# Establish the configuration file name, if readable source it +# +CONFIG_LOAD=`pwd`/Configuration +if [ ! -r ${CONFIG_LOAD} ] +then + echo "Cannot read configuration file: ${CONFIG_LOAD}" | tee -a ${LOG} + exit 1 +fi + +. ${CONFIG_LOAD} + +# +# Source the DLA library functions. +# +if [ "${DLAJOBSTREAMFUNC}" != "" ] +then + if [ -r ${DLAJOBSTREAMFUNC} ] + then + . ${DLAJOBSTREAMFUNC} + else + echo "Cannot source DLA functions script: ${DLAJOBSTREAMFUNC}" | tee -a ${LOG} + exit 1 + fi +else + echo "Environment variable DLAJOBSTREAMFUNC has not been defined." | tee -a ${LOG} + exit 1 +fi + +# +# archive/clean the logs/output directories? +# +if [ ${ARC_OUTPUT} = "yes" ] +then + date | tee -a ${LOG} + echo "archiving output directory" | tee -a ${LOG} + createArchive ${ARCHIVEDIR}/output ${CACHEDATADIR} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "createArchive for output directory failed" | tee -a ${LOG} + exit 1 + fi +fi + +if [ ${CLEAN_OUTPUT} = "yes" ] +then + date | tee -a ${LOG} + echo "cleaning output directory" | tee -a ${LOG} + cleanDir ${CACHEDATADIR} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "cleanDir for output directory failed" | tee -a ${LOG} + exit 1 + fi + +fi + +if [ ${ARC_LOGS} = "yes" ] +then + date | tee -a ${LOG} + echo "archiving logs directory" | tee -a ${LOG} + createArchive ${ARCHIVEDIR}/logs ${CACHELOGSDIR} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "createArchive for logs directory failed" | tee -a ${LOG} + exit 1 + fi +fi + +if [ ${CLEAN_LOGS} = "yes" ] +then + date | tee -a ${LOG} + echo "cleaning logs directory" | tee -a ${LOG} + cleanDir ${CACHELOGSDIR} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "cleanDir for logs directory failed" | tee -a ${LOG} + exit 1 + fi +fi + +date | tee -a ${LOG} + +# +# Establish the load log now that we have archived/cleaned +# +LOAD_LOG=${CACHELOGSDIR}/`basename $0`.log +date | tee -a ${LOAD_LOG} + +cd ${CACHEDATADIR} + +# +# Allow bcp into database +# +date | tee -a ${LOAD_LOG} +echo "Allow bcp into database" | tee -a ${LOAD_LOG} +${MGI_DBUTILS}/bin/turnonbulkcopy.csh ${SNPBE_DBSERVER} ${SNPBE_DBNAME} | tee -a ${LOAD_LOG} + +# +# Load dbSNP marker relationships +# + +# create bcp file +echo "Calling snpmarker.py" | tee -a ${LOAD_LOG} +${CACHEINSTALLDIR}/snpmarker.py | tee -a ${LOAD_LOG} +STAT=$? +if [ ${STAT} -ne 0 ] +then + echo "snpmarker.py failed" | tee -a ${LOAD_LOG} + exit 1 +fi + +# SNP_MRK_TABLE truncate, drop indexes, bcp in, create indexes +date | tee -a ${LOAD_LOG} +echo "bcp in ${SNP_MRK_TABLE}" | tee -a ${LOAD_LOG} +echo "" | tee -a ${LOAD_LOG} +${MGI_DBUTILS}/bin/bcpin_withTruncateDropIndex.csh ${SNPBE_DBSCHEMADIR} ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${SNP_MRK_TABLE} ${CACHEDATADIR} ${SNP_MRK_FILE} ${DL} ${NL} +STAT=$? +if [ ${STAT} -ne 0 ] +then + echo "${MGI_DBUTILS}/bin/bcpin_withTruncateDropIndex.csh failed" | tee -a ${LOAD_LOG} + exit 1 +fi + +# SNP_MRK_TABLE update statistics +echo "" | tee -a ${LOAD_LOG} +date | tee -a ${LOAD_LOG} +echo "Update statistics on ${SNP_MRK_TABLE} table" | tee -a ${LOAD_LOG} +${MGI_DBUTILS}/bin/updateStatistics.csh ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${SNP_MRK_TABLE} | tee -a ${LOAD_LOG} + +# +# bcp in ACC_TABLE, dropping/recreating indexes +# + +# ACC_TABLE drop indexes +echo "" | tee -a ${LOG} +date | tee -a ${LOG} +echo "Drop indexes on ${ACC_TABLE} table" | tee -a ${LOG} +${SNPBE_DBSCHEMADIR}/index/${ACC_TABLE}_drop.object | tee -a ${LOG} + +# ACC_TABLE bcp in +date | tee -a ${LOAD_LOG} +echo "bcp in ${ACC_TABLE} " | tee -a ${LOAD_LOG} +echo "" | tee -a ${LOAD_LOG} +${MGI_DBUTILS}/bin/bcpin.csh ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${ACC_TABLE} ${CACHEDATADIR} ${ACC_FILE} ${DL} ${NL} +STAT=$? +if [ ${STAT} -ne 0 ] +then + echo "${MGI_DBUTILS}/bin/bcpin.csh failed" | tee -a ${LOAD_LOG} + exit 1 +fi + +# ACC_TABLE create indexes +echo "" | tee -a ${LOG} +date | tee -a ${LOG} +echo "Create indexes on ${ACC_TABLE} table" | tee -a ${LOG} +${SNPBE_DBSCHEMADIR}/index/${ACC_TABLE}_create.object | tee -a ${LOG} + +# ACC_TABLE update statistics +echo "" | tee -a ${LOAD_LOG} +date | tee -a ${LOAD_LOG} +echo "Update statistics on ${ACC_TABLE} table" | tee -a ${LOAD_LOG} +${MGI_DBUTILS}/bin/updateStatistics.csh ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${ACC_TABLE} | tee -a ${LOAD_LOG} + +# +# Load MGI snp/marker distance relationships +# +# Only run the following steps if the dbSNP and MGI coordinates are +# synchronized (same mouse genome build). +# +if [ ${IN_SYNC} = "yes" ] +then + + # + # load snp..MRK_Location_Cache + # + + # bcp out MRKLOC_CACHETABLE + echo "" | tee -a ${LOAD_LOG} + date | tee -a ${LOAD_LOG} + echo "bcp out ${MRKLOC_CACHETABLE}" | tee -a ${LOAD_LOG} + ${MGI_DBUTILS}/bin/bcpout.csh ${MGD_DBSERVER} ${MGD_DBNAME} ${MRKLOC_CACHETABLE} ${CACHEDATADIR} ${MRKLOC_CACHEFILE} ${DL} ${NL} | tee -a ${LOAD_LOG} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "${MGI_DBUTILS}/bin/bcpout.csh failed" | tee -a ${LOAD_LOG} + exit 1 + fi + + # bcp in MRKLOC_CACHETABLE, truncating and dropping/recreating indexes + echo "" | tee -a ${LOAD_LOG} + date | tee -a ${LOAD_LOG} + echo "bcp in MRK_Location_Cache" | tee -a ${LOAD_LOG} + ${MGI_DBUTILS}/bin/bcpin_withTruncateDropIndex.csh ${SNPBE_DBSCHEMADIR} ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${MRKLOC_CACHETABLE} ${CACHEDATADIR} ${MRKLOC_CACHEFILE} ${DL} ${NL} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "${MGI_DBUTILS}/bin/bcpin_withTruncatDropIndex.csh failed" | tee -a ${LOAD_LOG} + exit 1 + fi + + # update statistics + echo "updating statistics on ${MRKLOC_CACHETABLE}" | tee -a ${LOAD_LOG} + ${MGI_DBUTILS}/bin/updateStatistics.csh ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${MRKLOC_CACHETABLE} | tee -a ${LOAD_LOG} + + # + # Update dbSNP locus-region function class to upstream/downstream + # + + echo "" | tee -a ${LOAD_LOG} + date | tee -a ${LOAD_LOG} + echo "Calling snpmrklocus.py" | tee -a ${LOAD_LOG} + ${CACHEINSTALLDIR}/snpmrklocus.py | tee -a ${LOAD_LOG} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "${CACHEINSTALLDIR}/snpmrklocus.py failed" | tee -a ${LOAD_LOG} + exit 1 + fi + + # + # load MGI snp to marker relationships + # + + # create the bcp file(s) + echo "" | tee -a ${LOAD_LOG} + date | tee -a ${LOAD_LOG} + echo "Calling snpmrkwithin.py" | tee -a ${LOAD_LOG} + ${CACHEINSTALLDIR}/snpmrkwithin.py | tee -a ${LOAD_LOG} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "${CACHEINSTALLDIR}/snpmrkwithin.py failed" | tee -a ${LOAD_LOG} + exit 1 + fi + + # SNP_MRK_TABLE drop indexes + echo "" | tee -a ${LOG} + date | tee -a ${LOG} + echo "Drop indexes on ${SNP_MRK_TABLE} table" | tee -a ${LOG} + ${SNPBE_DBSCHEMADIR}/index/${SNP_MRK_TABLE}_drop.object | tee -a ${LOG} + echo "" | tee -a ${LOG} + + # SNP_MRK_TABLE bcp in each file + cd ${CACHEDATADIR} + for i in `ls ${SNP_MRK_WITHIN_FILE}*` + do + date | tee -a ${LOAD_LOG} + echo "Load ${i} into ${SNP_MRK_TABLE} table" | tee -a ${LOAD_LOG} + echo "" | tee -a ${LOAD_LOG} + ${MGI_DBUTILS}/bin/bcpin.csh ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${SNP_MRK_TABLE} ${CACHEDATADIR} ${i} ${DL} ${NL} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "${MGI_DBUTILS}/bin/bcpin.csh failed" | tee -a ${LOAD_LOG} + exit 1 + fi + done + + # SNP_MRK_TABLE create indexes + echo "" | tee -a ${LOG} + date | tee -a ${LOG} + echo "Create indexes on ${SNP_MRK_TABLE} table" | tee -a ${LOG} + ${SNPBE_DBSCHEMADIR}/index/${SNP_MRK_TABLE}_create.object | tee -a ${LOG} + + # SNP_MRK_TABLE update statistics + echo "" | tee -a ${LOAD_LOG} + date | tee -a ${LOAD_LOG} + echo "Update statistics on ${SNP_MRK_TABLE} table" | tee -a ${LOAD_LOG} + ${MGI_DBUTILS}/bin/updateStatistics.csh ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${SNP_MRK_TABLE} | tee -a ${LOAD_LOG} + +fi + +echo "" | tee -a ${LOAD_LOG} +date | tee -a ${LOAD_LOG} diff --git a/snpmarker_weekly.sh b/snpmarker_weekly.sh new file mode 100755 index 0000000..8d66392 --- /dev/null +++ b/snpmarker_weekly.sh @@ -0,0 +1,199 @@ +#!/bin/sh -x + +# +# Program: snpmarker_weekly.sh +# +# Purpose: +# 1) run snpmarker.sh to create new snp/marker associations in +# a backend snp database +# 2) create a backup of the source snp db +# 3) load the backup into the destination snp db +# +# Usage: snpmarker_weekly.sh [-a] [-r] [-c] [-l] +# where: +# -a is an option to archive output directory +# -r is an option to archive logs directory +# -c is an option to clean output directory +# -l is an option to clean logs directory +# +# History +# +# lec 06/30/2006 - modified for mgiconfig +# +# sc 05/01/2006 - updated to bourne shell +# - added optional archiving and directory cleaning +# for logs and output dirs +# - updated to use new bcpin_withTruncateDropIndex.csh +# sc 03/27/2006 - created + +# +# Set up a log file for the shell script in case there is an error +# during configuration and initialization. +# +cd `dirname $0` +LOG=`pwd`/`basename $0`.log +rm -f ${LOG} + +echo date | tee -a ${LOG} +# +# Verify the argument(s) to the shell script. +# +ARC_OUTPUT=no +ARC_LOGS=no +CLEAN_OUTPUT=no +CLEAN_LOGS=no + +usage="Usage: snpmarker_weekly.sh [-a] [-r] [-c] [-l]" + +# +# report usage if there are unrecognized arguments +# +set -- `getopt arcl $*` +if [ $? != 0 ] +then + echo ${usage} | tee -a ${LOG} + exit 1 +fi + +# +# determine which options on command line +# +for i in $* +do + case $i in + -a) ARC_OUTPUT=yes; shift;; + -r) ARC_LOGS=yes; shift;; + -c) CLEAN_OUTPUT=yes; shift;; + -l) CLEAN_LOGS=yes; shift;; + --) shift; break;; + esac +done + +# +# Establish the configuration file name, if readable source it +# +CONFIG_LOAD=`pwd`/Configuration +if [ ! -r ${CONFIG_LOAD} ] +then + echo "Cannot read configuration file: ${CONFIG_LOAD}" | tee -a ${LOG} + exit 1 +fi + +. ${CONFIG_LOAD} + +# +# Source the DLA library functions. +# +if [ "${DLAJOBSTREAMFUNC}" != "" ] +then + if [ -r ${DLAJOBSTREAMFUNC} ] + then + . ${DLAJOBSTREAMFUNC} + else + echo "Cannot source DLA functions script: ${DLAJOBSTREAMFUNC}" | tee -a ${LOG} + exit 1 + fi +else + echo "Environment variable DLAJOBSTREAMFUNC has not been defined." | tee -a ${LOG} + exit 1 +fi + +# +# archive/clean the logs and/or output directories? +# +if [ ${ARC_OUTPUT} = "yes" ] +then + date | tee -a ${LOG} + echo "archiving output directory" | tee -a ${LOG} + createArchive ${ARCHIVEDIR}/output ${CACHEDATADIR} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "createArchive for output directory failed" | tee -a ${LOG} + exit 1 + fi +fi + +if [ ${CLEAN_OUTPUT} = "yes" ] +then + date | tee -a ${LOG} + echo "cleaning output directory" | tee -a ${LOG} + cleanDir ${CACHEDATADIR} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "cleanDir for output directory failed" | tee -a ${LOG} + exit 1 + fi + +fi + +if [ ${ARC_LOGS} = "yes" ] +then + date | tee -a ${LOG} + echo "archiving logs directory" | tee -a ${LOG} + createArchive ${ARCHIVEDIR}/logs ${CACHELOGSDIR} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "createArchive for logs directory failed" | tee -a ${LOG} + exit 1 + fi +fi + +if [ ${CLEAN_LOGS} = "yes" ] +then + date | tee -a ${LOG} + echo "cleaning logs directory" | tee -a ${LOG} + cleanDir ${CACHELOGSDIR} + STAT=$? + if [ ${STAT} -ne 0 ] + then + echo "cleanDir for logs directory failed" | tee -a ${LOG} + exit 1 + fi +fi + +date | tee -a ${LOG} + +# +# Establish the load log now that we have archived/cleaned +# +LOAD_LOG=${CACHELOGSDIR}/`basename $0`.log +date | tee -a ${LOAD_LOG} + +cd ${CACHEDATADIR} + +# +# run snpmarker.sh with no archive/clean to load the source db +# + +date | tee -a ${LOAD_LOG} +echo "Calling ${CACHEINSTALLDIR}/snpmarker.sh" | tee -a ${LOAD_LOG} +${CACHEINSTALLDIR}/snpmarker.sh +STAT=$? +if [ ${STAT} -ne 0 ] +then + echo "${CACHEINSTALLDIR}/snpmarker.sh failed" | tee -a ${LOAD_LOG} + exit 1 +fi + +# +# backup back end production snp database +# +echo "" | tee -a ${LOAD_LOG} +date | tee -a ${LOAD_LOG} +echo "Backing up ${SNPBE_DBSERVER}..${SNPBE_DBNAME}" +${MGI_DBUTILS}/bin/dump_db.csh ${SNPBE_DBSERVER} ${SNPBE_DBNAME} ${SNP_BACKUP_LOCALPATH} | tee -a ${LOAD_LOG} + +# +# load front-end snp database +# +echo "" | tee -a ${LOG} +date | tee -a ${LOG} +echo "Loading ${SNP_DBSERVER}..${SNP_DBNAME}" +${MGI_DBUTILS}/bin/load_db.csh ${SNP_DBSERVER} ${SNP_DBNAME} ${SNP_BACKUP_REMOTEPATH} | tee -a ${LOAD_LOG} + +echo "" | tee -a ${LOAD_LOG} +date | tee -a ${LOAD_LOG} + diff --git a/snpmrklocation.py b/snpmrklocation.py deleted file mode 100755 index 1a4e1be..0000000 --- a/snpmrklocation.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/local/bin/python - -''' -# -# Purpose: -# -# Create bcp files for snp..MRK_Location_Cache -# -# Usage: -# snpmrklocation.py -# -# History -# -# 03/14/2006 sc created -# -''' - -import sys -import os -import db -import mgi_utils -import string -import accessionlib - -NL = '\n' -DL = '|' -TAB = '\t' -outputdir = os.environ['CACHEDATADIR'] -cacheTable = os.environ['MRKLOC_CACHETABLE'] - -bcpFile = open('%s/%s.bcp' % (outputdir, cacheTable), 'w') - -def setup(): - # set up connection to the mgd database - server = os.environ['MGD_DBSERVER'] - mgdDB = os.environ['MGD_DBNAME'] - user = os.environ['MGD_DBUSER'] - password = string.strip(open(os.environ['MGD_DBPASSWORDFILE'], 'r').readline()) - db.set_sqlLogin(user, password, server, mgdDB) - -def createBCP(): - print 'Creating %s/%s.bcp' % (outputdir, cacheTable) - cmds = [] - cmds.append('select * ' + \ - 'from MRK_Location_Cache') - results = db.sql(cmds, 'auto') - for r in results[0]: - markerKey = r['_Marker_key'] - chromosome = r['chromosome'] - sequenceNum = r['sequenceNum'] - cytoOffset = r['cytogeneticOffset'] - if cytoOffset == None: - cytoOffset = '' - offset = r['offset'] - startCoord = r['startCoordinate'] - if startCoord == None: - startCoord = '' - endCoord = r['endCoordinate'] - if endCoord == None: - endCoord = '' - strand = r['strand'] - if strand == None: - strand = '' - mapUnits = r['mapUnits'] - if mapUnits == None: - mapUnits = '' - provider = r['provider'] - if provider == None: - provider = '' - version = r['version'] - if version == None: - version = '' - bcpFile.write(str(markerKey) + DL + \ - str(chromosome) + DL + \ - str(sequenceNum) + DL + \ - str(cytoOffset) + DL + \ - str(offset) + DL + \ - str(startCoord) + DL + \ - str(endCoord) + DL + \ - str(strand) + DL + \ - str(mapUnits) + DL + \ - str(provider) + DL + \ - str(version) + NL) - bcpFile.close() - -# -# Main Routine -# - -print '%s' % mgi_utils.date() -setup() -createBCP() -print '%s' % mgi_utils.date() - diff --git a/snpmrkwithin.py b/snpmrkwithin.py index ed56d93..30e9909 100755 --- a/snpmrkwithin.py +++ b/snpmrkwithin.py @@ -90,8 +90,11 @@ # max number of lines per bcp file to avoid file > 2Gb MAX_BCP_LINES = string.atoi(os.environ['MAX_BCP_LINES']) +# QTL Markery Type Key +MRKR_QTLTYPE_KEY = string.atoi(os.environ['MRKR_QTLTYPE_KEY']) + # -# globals +# GLOBALS # # number suffix for the current bcp file name @@ -196,53 +199,15 @@ def initialize(): 'from SNP_ConsensusSnp_Marker', 'auto') primaryKey = results[0]['key'] - print 'primaryKey: %s' % primaryKey - openBCPFile() - - return - -# Purpose: Creates a new bcp file pointer. Uses a counter to -# create a unique name -# Returns: Nothing -# Assumes: Nothing -# Effects: Exits if can't open the new bcp file -# Throws: Nothing - -def openBCPFile(): - global fpSnpMrk - global snpMrkFileCtr - #global snpMrkFile - - # append this to next bcp filename - snpMrkFileCtr = snpMrkFileCtr + 1 - try: - fpSnpMrk = open("%s%s" % (snpMrkFile, snpMrkFileCtr),'w') - except: - sys.stderr.write('Could not open bcp file: %s\n' % snpMrkFile) + print 'primaryKey: %s' % primaryKey + if primaryKey == None: + sys.stderr.write('SNP_ConsensusSnp_Marker table is ' + \ + 'empty, load dbSNP Marker associations first') sys.exit(1) + openBCPFile() return -# Purpose: Perform cleanup steps for the script. -# Returns: Nothing -# Assumes: Nothing -# Effects: Nothing -# Throws: Nothing - -def finalize(): - global fpSnpMrk - - db.useOneConnection(0) - - # - # Close the bcp file. - # - fpSnpMrk.close() - - return - - - # Purpose: Create a bcp file with annotations for SNP/marker pairs where # the SNP is within 1000 kb of the marker and there is no existing # annotation for the SNP/marker. @@ -275,6 +240,45 @@ def process(): return +# Purpose: Perform cleanup steps for the script. +# Returns: Nothing +# Assumes: Nothing +# Effects: Nothing +# Throws: Nothing + +def finalize(): + global fpSnpMrk + + db.useOneConnection(0) + + # + # Close the bcp file. + # + fpSnpMrk.close() + + return + +# Purpose: Creates a new bcp file pointer. Uses a counter to +# create a unique name +# Returns: Nothing +# Assumes: Nothing +# Effects: Exits if can't open the new bcp file +# Throws: Nothing + +def openBCPFile(): + global fpSnpMrk + global snpMrkFileCtr + + # append this to next bcp filename + snpMrkFileCtr = snpMrkFileCtr + 1 + try: + fpSnpMrk = open("%s%s" % (snpMrkFile, snpMrkFileCtr),'w') + except: + sys.stderr.write('Could not open bcp file: %s\n' % snpMrkFile) + sys.exit(1) + + return + # Purpose: Process all SNPs within the startCoord-endCoord range on the given # chr - by using binary search to find sub-regions with a small # enough number of SNPs (< MAX_NUMBER_SNPS) to process at a time @@ -430,7 +434,8 @@ def processSNPregion(chr, startCoord, endCoord): 'mc.endCoordinate "markerEnd", ' + \ 'mc.strand "markerStrand" ' + \ 'from MRK_Location_Cache mc ' + \ - 'where mc.chromosome = "%s" and ' + \ + 'where mc._Marker_Type_key != %s and ' % MRKR_QTLTYPE_KEY + \ + 'mc.chromosome = "%s" and ' + \ 'mc.endCoordinate >= %s and ' + \ 'mc.startCoordinate <= %s ' @@ -528,7 +533,7 @@ def processSNPmarkerPair(snp, # dictionary w/ keys as above # next available _SNP_ConsensusSnp_Marker_key global primaryKey - KB_DISTANCE = [ 2, 10, 100, 500, 1000 ] + #KB_DISTANCE = [ 2, 10, 100, 500, 1000 ] markerStart = marker['markerStart'] markerEnd = marker['markerEnd'] @@ -539,6 +544,7 @@ def processSNPmarkerPair(snp, # dictionary w/ keys as above snpKey = snp['_ConsensusSnp_key'] featureKey = snp['_Coord_Cache_key'] + fxnKey = -1 # # The SNP is located within the coordinates of the marker. # @@ -551,25 +557,25 @@ def processSNPmarkerPair(snp, # dictionary w/ keys as above # distances from the marker. Check each distance (starting # with the small range) to see which one it is. # - else: - sys.stdout.flush() - for kbDist in KB_DISTANCE: - - fxnKey = getKBTerm(snpLoc, markerStart, markerEnd, - markerStrand, kbDist) - - # - # If the distance has been determined, don't check - # any others. - # - if fxnKey > 0: - break + #else: + # sys.stdout.flush() + # for kbDist in KB_DISTANCE: + # + # fxnKey = getKBTerm(snpLoc, markerStart, markerEnd, + # markerStrand, kbDist) + # + # # + # # If the distance has been determined, don't check + # # any others. + # # + # if fxnKey > 0: + # break # if fxnKey can't be determined print msg to stdout # so it will be logged and return if fxnKey == -1: - print SNP_NOT_WITHIN % (snp, MARKER_PAD, marker) - sys.stdout.flush() - return + #print SNP_NOT_WITHIN % (snp, MARKER_PAD, marker) + #sys.sys.stdout.flush() + return # check the number of bcp lines in the current file, creating # new file if >= the configured max @@ -645,7 +651,6 @@ def getKBTerm(snpLoc, markerStart, markerEnd, markerStrand, kbDist): # elif markerStrand == '-' and snpLoc > midPoint: direction = 'upstream' - # # If the SNP coordinate is <= the midpoint of the marker # and strand is Null (MIT marker), the SNP is considered to be upstream. diff --git a/system_docs/README b/system_docs/README new file mode 100644 index 0000000..6a84483 --- /dev/null +++ b/system_docs/README @@ -0,0 +1 @@ +See dbsnpload/system_docs for documentation