-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsnpmrklocus.py
executable file
·315 lines (270 loc) · 8.03 KB
/
snpmrklocus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# snpmrklocus.py
###########################################################################
#
# Purpose:
#
# This script will check records in the SNP_ConsensusSnp_Marker table
# where SNP/marker pairs have been annotated to the "locus-region"
# SNP function class and determine whether the annotation should be
# upstream or downstream, depending on the SNP/marker coordinates.
#
# Usage:
#
# snpmrklocus.py
#
# Env Vars: None
#
# Inputs:
#
# The following tables in the SNP database are used as input:
#
# 1) SNP_ConsensusSnp_Marker
# 2) SNP_Coordinate_Cache
# 3) MRK_Location_Cache
#
# Outputs:
#
# A "|" delimited bcp file to load records into a temporary table.
#
# Exit Codes:
#
# 0: Successful completion
# 1: An exception occurred
#
# Assumes: Nothing
#
# Notes: None
#
###########################################################################
#
# Modification History:
#
# Date SE Change Description
# ---------- --- -------------------------------------------------------
# 11/23/2015 sc TR11937/dbSNP 142
#
# 01/25/2013 lec TR11248/TR10778 convert to postgres
#
# 04/20/2012 sc TR10778 convert to postgres
#
# 09/01/2011 lec TR10805/add _Organism_key = 1
#
# 06/30/2006 lec modified for mgiconfig
#
# 03/21/2006 sc updated locus-region upstream/downstream algorithm tr7563
# MGI3.44
# 09/28/2005 DBM Initial development
#
###########################################################################
import sys
import os
import loadlib
import db
import io
#
# CONSTANTS
#
DL = '|'
CRT = '\n'
NULL = ''
LOCUS_REGION_TERM = 'Locus-Region'
UPSTREAM_TERM = 'upstream'
DOWNSTREAM_TERM = 'downstream'
# _Term_key for 'Locus-Region' function class
locusRegionKey = 0
# database environment variables
server = os.environ['MGD_DBSERVER']
database = os.environ['MGD_DBNAME']
user = os.environ['MGD_DBUSER']
#print server
#print database
# lookup to resolve function class str.to key
fxnLookup = {}
#
# FUNCTIONS
#
# Purpose: Perform initialization for the script.
# Returns: Nothing
# Assumes: Nothing
# Effects: Nothing
# Throws: Nothing
def initialize():
global locusRegionKey
global fxnLookup
global tmpFxnTable, tmpFxnFile, fpTmpFxn
print('Perform initialization')
sys.stdout.flush()
#
# Initialize variables.
#
dataDir = os.environ['CACHEDATADIR']
tmpFxnTable = os.environ['TMP_FXN_TABLE']
tmpFxnFile = dataDir + '/' + os.environ['TMP_FXN_FILE']
#
# Set up a connection to the mgd database.
#
db.useOneConnection(1)
db.setReturnAsMGI(False)
results = db.sql('''
SELECT t._Term_key
FROM VOC_Term t
WHERE t._Vocab_key = 49
AND t.term = '%s'
''' % (LOCUS_REGION_TERM), 'auto')
locusRegionKey = results[1][0]
#
# Open the bcp file.
#
try:
fpTmpFxn = open(tmpFxnFile,'w')
except:
sys.stderr.write('Could not open bcp file: %s\n' % tmpFxnFile)
sys.exit(1)
return
# Purpose: Perform cleanup steps for the script.
# Returns: Nothing
# Assumes: Nothing
# Effects: Nothing
# Throws: Nothing
def finalize():
db.useOneConnection(0)
return
# Purpose: Create a bcp file that contains the primary key for each
# "locus-region" record, along with the key for the new function
# class that should be used to update each record.
# Returns: Nothing
# Assumes: Nothing
# Effects: Nothing
# Throws: Nothing
def createBCPFile():
global fpTmpFxn
print('Get locus-region SNP/marker annotations')
sys.stdout.flush()
results = db.sql('''
SELECT sm._ConsensusSnp_Marker_key,
sc.startCoordinate as snpLoc,
mc.startCoordinate as markerStart,
mc.endCoordinate as markerEnd,
mc.strand as markerStrand
FROM SNP_ConsensusSnp_Marker sm,
SNP_Coord_Cache sc,
MRK_Location_Cache mc
WHERE sm._ConsensusSnp_key = sc._ConsensusSnp_key
AND sm._Coord_Cache_key = sc._Coord_Cache_key
AND sm._Marker_key = mc._Marker_key
AND mc._Organism_key = 1
AND sm._Fxn_key = %s
AND mc.startCoordinate IS NOT NULL
AND mc.endCoordinate IS NOT NULL
''' % (locusRegionKey), 'auto')
print('Create the bcp file')
sys.stdout.flush()
for r in results[1]:
primaryKey = r[0]
snpLoc = r[1]
markerStart = r[2]
markerEnd = r[3]
markerStrand = r[4]
#
# Find the midpoint of the marker.
#
midPoint = (markerStart + markerEnd) / 2.0
#
# If the SNP coordinate is <= the midpoint of the marker on a
# "+" strand, the SNP is considered to be upstream.
#
if markerStrand == '+' and snpLoc <= midPoint:
direction = 'upstream'
#
# If the SNP coordinate is > the midpoint of the marker on a
# "+" strand, the SNP is considered to be downstream.
#
elif markerStrand == '+' and snpLoc > midPoint:
direction = 'downstream'
#
# If the SNP coordinate is <= the midpoint of the marker on a
# "-" strand, the SNP is considered to be downstream.
#
elif markerStrand == '-' and snpLoc <= midPoint:
direction = 'downstream'
#
# If the SNP coordinate is > the midpoint of the marker on a
# "-" strand, the SNP is considered to be upstream.
#
elif markerStrand == '-' and snpLoc > midPoint:
direction = 'upstream'
#
# If the SNP coordinate is <= the midpoint of the marker
# and strand is Null, the SNP is considered to be proximal
#
elif markerStrand == None and snpLoc <= midPoint:
direction = 'proximal'
#
# If the SNP coordinate is > the midpoint of the marker
# and strand is Null, the SNP is considered to be downstream.
#
elif markerStrand == None and snpLoc > midPoint:
direction = 'distal'
else:
print('not covered by algorithm')
print(' primaryKey: %s snpLoc: %s markerStart: %s markerEnd: %s markerStrand: %s' % ( primaryKey, snpLoc, markerStart, markerEnd, markerEnd))
fpTmpFxn.write(str(primaryKey) + DL + direction + CRT)
#
# Close the bcp file.
#
fpTmpFxn.close()
return
# Purpose: Load the bcp file into a new temp table.
# Returns: Nothing
# Assumes: Nothing
# Effects: Nothing
# Throws: Nothing
def loadBCPFile():
global tmpFxnTable, tmpFxnFile
print('Create the temp table')
sys.stdout.flush()
db.sql('''
CREATE TEMPORARY TABLE %s
(_ConsensusSnp_Marker_key int not null,
direction varchar not null
)
''' % (tmpFxnTable), None)
print('Load the bcp file into the temp table')
sys.stdout.flush()
tmpFile = open(tmpFxnFile, 'r')
db.executeCopyFrom(tmpFile, tmpFxnTable, DL)
db.commit()
print('Create indexes on the temp table')
sys.stdout.flush()
db.sql('CREATE index idx1 on %s (_ConsensusSnp_Marker_key)' % (tmpFxnTable), None)
db.sql('CREATE index idx2 on %s (direction)' % (tmpFxnTable), None)
# Purpose: Update the function classes using the keys in the temp table.
# Returns: Nothing
# Assumes: Nothing
# Effects: Nothing
# Throws: Nothing
def applyUpdates():
global tmpFxnTable
print('Update the distance direction')
sys.stdout.flush()
db.sql('''
UPDATE SNP_ConsensusSnp_Marker sm
SET distance_direction = t.direction
FROM %s t
WHERE sm._ConsensusSnp_Marker_key = t._ConsensusSnp_Marker_key
''' % (tmpFxnTable), 'auto')
results = db.sql('''
SELECT t.*
FROM SNP_ConsensusSnp_Marker sm, %s t
WHERE sm._ConsensusSnp_Marker_key = t._ConsensusSnp_Marker_key
''' % (tmpFxnTable), 'auto')
db.commit()
#
# MAIN
#
initialize()
createBCPFile()
loadBCPFile()
applyUpdates()
finalize()
sys.exit(0)