Skip to content

Commit

Permalink
restore master to code from 6-0-22-1 tag
Browse files Browse the repository at this point in the history
  • Loading branch information
sGiannatto committed Apr 18, 2023
1 parent f1b3b56 commit 9262651
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 83 deletions.
22 changes: 13 additions & 9 deletions HISTORY
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
TAG: snpcacheload-6-0-22-3
TAG: snpcacheload-6-0-22-2
DATE: 04/17/2023
TAG: snpcacheload-6-0-22-4
DATE: 04/18/2023
STAFF: sc
CHANGES: IN_SYNC=yes
for some reason 6-0-17-1 has IN_SYNC=yes, but when you checkout
master IN_SYNC=no, so I fixed this.
6-0-22-1 has current correct code - master is not correct, maybe
a push was missed when merging wts2-837 branch to trunk
This tag restores master to 6-0-22-1 code

TAG: snpcacheload-6-0-17-1
DATE: 05/11/2021
TAG: snpcacheload-6-0-22-1
DATE: 10/03/2022
STAFF: sc
CHANGES: TR13349 Build39
wts2-837 remap snps merge to trunk

TAG: snpcacheload-wts2-837-1
DATE: 09/08/2022
STAFF: sc
CHANGES: WTS2-837 remap snps

TAG: snpcacheload-6-0-16-4
TAG: snpcacheload-6-0-16-3
Expand Down
33 changes: 25 additions & 8 deletions snpmarker.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,19 @@ def initialize():

# query for all egId to marker associations
# exclude: withdrawn markers, marker type QTL and Cytogenetic, feature type heritable phenotypic
results = db.sql('''SELECT a.accID AS egId, a._Object_key AS _Marker_key
FROM ACC_Accession a, MRK_Marker m
results = db.sql('''SELECT a.accID AS egId, a._Object_key AS _Marker_key, m.symbol, c.startCoordinate, c.endCoordinate
FROM ACC_Accession a, MRK_Marker m, MRK_Location_Cache c
WHERE a._LogicalDB_key = %s
AND a._MGIType_key = %s
AND a.preferred = 1
AND a._Object_key = m._Marker_key
AND m._Marker_Status_key = 1
AND m._Marker_key = c._Marker_key
''' % (egLdbKey, mrkMgiTypeKey), 'auto' )

print('count of marker/EG records %s\n' % len(results))
for r in results:
markerLookup[r['egId']] = r['_Marker_key']
markerLookup[r['egId']] = [r['_Marker_key'] , r['startCoordinate'], r['endCoordinate'], r['symbol']]

results = db.sql('''select * from SNP_Transcript_Protein''', 'auto')

Expand Down Expand Up @@ -270,20 +271,36 @@ def writeBCP(results):
for r in results:
#print r
# sys.stdout.flush()
rsId = r['rsId']
egId = r['egId']

#print 'egId: %s' % egId
sys.stdout.flush()
#
# if egId is not associated with an MGI marker, skip it
#
if egId not in markerLookup:
print('egId not associated with MGI marker: %s' % egId)
print('egId not associated with MGI marker: %s for %s' % (egId, rsId))
continue

#
# get the marker key for 'egId' and write a line to the bcp file
#
markerKey = markerLookup[ egId ]
snpCoord = r['startCoord'] # the snp coordinate

markerList = markerLookup[ egId ]
markerKey = markerList[0]
markerStart = markerList[1] # the marker start coord
markerEnd = markerList[2] # the marker end coord
mSymbol = markerList[3]
if markerStart == None:
print('No marker coordinate for rsId: %s egId: %s snpCoord: %s markerSymbol: %s markerStart: %s markerEnd: %s'% (rsId, egId, snpCoord, mSymbol, markerStart, markerEnd))
continue
# check if snp_coord < marker_start OR snp_coord > marker_end, continue
if snpCoord < markerStart or snpCoord > markerEnd:
print('snpCoord outside of marker coordinates rsId: %s egId: %s snpCoord: %s markerSymbol: %s markerStart: %s markerEnd: %s' % (rsId, egId, snpCoord, mSymbol, markerStart, markerEnd))
continue


# Then check the refseq transcript(s)????

primaryKey = primaryKey + 1

allele = r['contig_allele']
Expand Down
70 changes: 35 additions & 35 deletions snpmarker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -198,15 +198,15 @@ cd ${CACHEDATADIR}
# Load dbSNP marker relationships
#

date | tee -a ${SNPMARKER_LOG}
echo "Calling snpmarker.py" | tee -a ${SNPMARKER_LOG}
${PYTHON} ${SNPCACHELOAD}/snpmarker.py >> ${SNPMARKER_LOG} 2>&1
STAT=$?
if [ ${STAT} -ne 0 ]
then
echo "snpmarker.py failed" | tee -a ${SNPMARKER_LOG}
exit 1
fi
#date | tee -a ${SNPMARKER_LOG}
#echo "Calling snpmarker.py" | tee -a ${SNPMARKER_LOG}
#${PYTHON} ${SNPCACHELOAD}/snpmarker.py >> ${SNPMARKER_LOG} 2>&1
#STAT=$?
#if [ ${STAT} -ne 0 ]
#then
# echo "snpmarker.py failed" | tee -a ${SNPMARKER_LOG}
# exit 1
#fi

#
# copy in SNP_ConsensusSnp_Marker, truncating and dropping/recreating indexes
Expand All @@ -217,28 +217,28 @@ fi
date | tee -a ${SNPMARKER_LOG}
echo "Truncate and drop indexes on SNP_ConsensusSnp_Marker; drop constraints" | tee -a ${LOG}
${SNP_DBSCHEMADIR}/table/SNP_ConsensusSnp_Marker_truncate.object >> ${SNPMARKER_LOG} 2>&1
${SNP_DBSCHEMADIR}/index/SNP_ConsensusSnp_Marker_drop.object >> ${SNPMARKER_LOG} 2>&1
#${SNP_DBSCHEMADIR}/index/SNP_ConsensusSnp_Marker_drop.object >> ${SNPMARKER_LOG} 2>&1
${SNP_DBSCHEMADIR}/key/SNP_ConsensusSnp_Marker_drop.object >> ${SNPMARKER_LOG} 2>&1
${SNP_DBSCHEMADIR}/key/SNP_ConsensusSnp_drop.object >> ${SNPMARKER_LOG} 2>&1
${SNP_DBSCHEMADIR}/key/SNP_Coord_Cache_drop.object >> ${SNPMARKER_LOG} 2>&1

date | tee -a ${SNPMARKER_LOG}
echo "copy in ${SNP_MRK_TABLE}" | tee -a ${SNPMARKER_LOG}
echo "" | tee -a ${SNPMARKER_LOG}
${PG_DBUTILS}/bin/bcpin.csh ${MGD_DBSERVER} ${MGD_DBNAME} ${SNP_MRK_TABLE} ${CACHEDATADIR} ${SNP_MRK_FILE} ${DL} 'notused' ${SCHEMA} >> ${SNPMARKER_LOG} 2>&1
STAT=$?
echo "snpmarker.sh exit code from bulkLoadPostres ${STAT}"
if [ ${STAT} -ne 0 ]
then
echo "bcpin.csh failed" | tee -a ${SNPMARKER_LOG}
exit 1
fi
#date | tee -a ${SNPMARKER_LOG}
#echo "copy in ${SNP_MRK_TABLE}" | tee -a ${SNPMARKER_LOG}
#echo "" | tee -a ${SNPMARKER_LOG}
#${PG_DBUTILS}/bin/bcpin.csh ${MGD_DBSERVER} ${MGD_DBNAME} ${SNP_MRK_TABLE} ${CACHEDATADIR} ${SNP_MRK_FILE} ${DL} 'notused' ${SCHEMA} >> ${SNPMARKER_LOG} 2>&1
#STAT=$?
#echo "snpmarker.sh exit code from bulkLoadPostres ${STAT}"
#if [ ${STAT} -ne 0 ]
#then
# echo "bcpin.csh failed" | tee -a ${SNPMARKER_LOG}
# exit 1
#fi

# constraints will be added back at the end
date | tee -a ${SNPMARKER_LOG}
echo "Create index on SNP_ConsensusSnp_Marker" | tee -a ${LOG}
echo "" | tee -a ${SNPMARKER_LOG}
${SNP_DBSCHEMADIR}/index/SNP_ConsensusSnp_Marker_create.object >> ${SNPMARKER_LOG} 2>&1
#date | tee -a ${SNPMARKER_LOG}
#echo "Create index on SNP_ConsensusSnp_Marker" | tee -a ${LOG}
#echo "" | tee -a ${SNPMARKER_LOG}
#${SNP_DBSCHEMADIR}/index/SNP_ConsensusSnp_Marker_create.object >> ${SNPMARKER_LOG} 2>&1

#
# Load MGI snp/marker distance relationships
Expand All @@ -256,16 +256,16 @@ then
# Update dbSNP locus-region function class to upstream/downstream
#

date | tee -a ${SNPMARKER_LOG}
echo "Calling snpmrklocus.py" | tee -a ${SNPMARKER_LOG}
echo "" | tee -a ${SNPMARKER_LOG}
${PYTHON} ${SNPCACHELOAD}/snpmrklocus.py >> ${SNPMARKER_LOG} 2>&1
STAT=$?
if [ ${STAT} -ne 0 ]
then
echo "${SNPCACHELOAD}/snpmrklocus.py failed" | tee -a ${SNPMARKER_LOG}
exit 1
fi
#date | tee -a ${SNPMARKER_LOG}
#echo "Calling snpmrklocus.py" | tee -a ${SNPMARKER_LOG}
#echo "" | tee -a ${SNPMARKER_LOG}
#${PYTHON} ${SNPCACHELOAD}/snpmrklocus.py >> ${SNPMARKER_LOG} 2>&1
#STAT=$?
#if [ ${STAT} -ne 0 ]
#then
# echo "${SNPCACHELOAD}/snpmrklocus.py failed" | tee -a ${SNPMARKER_LOG}
# exit 1
#fi

#
# load MGI snp to marker relationships
Expand Down
66 changes: 35 additions & 31 deletions snpmrkwithin.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
#
# Date SE Change Description
# ---------- --- -------------------------------------------------------
# 09/06/2022 sc WTS2-837 remap snp coordinates (b39) - not loading dbSNP marker
# associations, so removed use of excludeDict
# 11/23/2015 sc TR11937/dbSNP 142
# 01/25/2013 lec TR11248/10788 - conversion to postgres
# 09/01/2011 lec TR10805/add _Organism_key = 1
Expand Down Expand Up @@ -119,7 +121,7 @@
user = os.environ['MGD_DBUSER']

# next available _SNP_ConsensusSnp_Marker_key
primaryKey = None
primaryKey = 1

#
# FUNCTIONS
Expand All @@ -137,7 +139,7 @@ def initialize():
#
global fxnLookup # create lookup to resolve function class string to key
global chrList # create list of chromosomes to process
global primaryKey # get next available _SNP_ConsensusSnp_Marker_key
#global primaryKey # get next available _SNP_ConsensusSnp_Marker_key
global snpMrkFile # get bcp file name prefix

print('Perform initialization')
Expand Down Expand Up @@ -183,13 +185,13 @@ def initialize():
#
# Get the max primary key for the SNP_ConsensusSnp_Marker table
#
results = db.sql('''SELECT MAX(_ConsensusSnp_Marker_key) as key
FROM SNP_ConsensusSnp_Marker''', 'auto')
primaryKey = results[1][0][0]
if primaryKey == None:
sys.stderr.write('SNP_ConsensusSnp_Marker table is empty, load dbSNP Marker associations first')
sys.exit(1)
primaryKey += 1
#results = db.sql('''SELECT MAX(_ConsensusSnp_Marker_key) as key
# FROM SNP_ConsensusSnp_Marker''', 'auto')
#primaryKey = results[1][0][0]
#if primaryKey == None:
# sys.stderr.write('SNP_ConsensusSnp_Marker table is empty, load dbSNP Marker associations first')
# sys.exit(1)
#primaryKey += 1
openBCPFile()

return
Expand Down Expand Up @@ -442,25 +444,25 @@ def processSNPregion(chr, startCoord, endCoord):
print('Marker Query end time: %s' % time.strftime("%H.%M.%S.%m.%d.%y", time.localtime(time.time())))
sys.stdout.flush()

print('ExcludeList Query start time: %s' % time.strftime("%H.%M.%S.%m.%d.%y", time.localtime(time.time())))
sys.stdout.flush()
#print('ExcludeList Query start time: %s' % time.strftime("%H.%M.%S.%m.%d.%y", time.localtime(time.time())))
#sys.stdout.flush()

# query to get ExcludeList
ExcludeList = db.sql('''
SELECT cm._ConsensusSnp_key,
cm._Marker_key
FROM SNP_Coord_Cache sc, SNP_ConsensusSnp_Marker cm
WHERE sc.chromosome = '%s'
AND sc.startCoordinate BETWEEN %s AND %s
AND sc._ConsensusSnp_key = cm._ConsensusSnp_key
''' % (chr, startCoord, endCoord), 'auto')

print('ExcludeList Query end time: %s' % time.strftime("%H.%M.%S.%m.%d.%y", time.localtime(time.time())))
sys.stdout.flush()

ExcludeDict = {} # empty the exclude list
for r in ExcludeList[1]:
ExcludeDict[(r[0],r[1])] = 1
#ExcludeList = db.sql('''
# SELECT cm._ConsensusSnp_key,
# cm._Marker_key
# FROM SNP_Coord_Cache sc, SNP_ConsensusSnp_Marker cm
# WHERE sc.chromosome = '%s'
# AND sc.startCoordinate BETWEEN %s AND %s
# AND sc._ConsensusSnp_key = cm._ConsensusSnp_key
# ''' % (chr, startCoord, endCoord), 'auto')

#print('ExcludeList Query end time: %s' % time.strftime("%H.%M.%S.%m.%d.%y", time.localtime(time.time())))
#sys.stdout.flush()

#ExcludeDict = {} # empty the exclude list
#for r in ExcludeList[1]:
# ExcludeDict[(r[0],r[1])] = 1

#
# Process each SNP on SNPlist
Expand All @@ -487,8 +489,8 @@ def processSNPregion(chr, startCoord, endCoord):
leftmostCoord = markerStart-MARKER_PAD
while (i >= 0 and SNPlist[i][2] >= leftmostCoord):

if ( (SNPlist[i][0], markerKey) not in ExcludeDict):
processSNPmarkerPair(SNPlist[i], curMarker)
#if ( (SNPlist[i][0], markerKey) not in ExcludeDict):
processSNPmarkerPair(SNPlist[i], curMarker)
i = i-1
# prevSnpIdx = snpIdx
# end SNP loop
Expand Down Expand Up @@ -544,7 +546,7 @@ def processSNPmarkerPair(snp, # dictionary w/ keys as above

if dirDist == []:
print(SNP_NOT_WITHIN % (snp, MARKER_PAD, marker))
sys.sys.stdout.flush()
sys.stdout.flush()
return
# otherwise direction and distance are set. If fxnKey not yet set ([0, 'not applicable']
# then set it
Expand Down Expand Up @@ -633,14 +635,16 @@ def getKBTerm(snpLoc, markerStart, markerEnd, markerStrand):
# If the SNP coordinate is <= the midpoint of the marker
# and strand is Null, the SNP is considered to be proximal
#
elif markerStrand == None and snpLoc <= midPoint:
# '.' strand for VISTA and Ensembl Regulatory features loaded as Gene Models
# because seq_coord_cache does not allow nulls
elif (markerStrand == None or markerStrand == '.') and snpLoc <= midPoint:
direction = 'proximal'
distance = markerStart - snpLoc
#
# If the SNP coordinate is > the midpoint of the marker
# and strand is Null, the SNP is considered to be downstream.
#
elif markerStrand == None and snpLoc > midPoint:
elif (markerStrand == None or markerStrand == '.') and snpLoc > midPoint:
direction = 'distal'
distance = snpLoc - markerEnd
else:
Expand Down

0 comments on commit 9262651

Please sign in to comment.