-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeo_htload.config.default
171 lines (126 loc) · 4.79 KB
/
geo_htload.config.default
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#format: sh
#
# geo_htload.config
#
# This file sets up environment variables that are needed for the geo_htload
#
###########################################################################
###########################################################################
#
# GENERAL SETTINGS
#
###########################################################################
#
# If the MGICONFIG environment variable does not have a local override,
# use the default "live" settings.
#
if [ "${MGICONFIG}" = "" ]
then
MGICONFIG=/usr/local/mgi/live/mgiconfig
export MGICONFIG
fi
. ${MGICONFIG}/master.config.sh
###########################################################################
#
# LOAD SPECIFIC SETTINGS
#
###########################################################################
# Path name of the archive, logs, reports and output directories
FILEDIR=${DATALOADSOUTPUT}/mgi/geo_htload
ARCHIVEDIR=${FILEDIR}/archive
LOGDIR=${FILEDIR}/logs
RPTDIR=${FILEDIR}/reports
OUTPUTDIR=${FILEDIR}/output
INPUTDIR=${FILEDIR}/input
export FILEDIR ARCHIVEDIR LOGDIR RPTDIR OUTPUTDIR INPUTDIR
QC_RPT=${RPTDIR}/geo_htload_qc
# This will get a date.rpt extension in the code
CURATED_QC_RPT=${RPTDIR}/geo_htload_curated_qc
EXP_PARSING_RPT=${RPTDIR}/geo_experiment_parsing.rpt
SAMP_PARSING_RPT=${RPTDIR}/geo_sample_parsing.rpt
SAMP_IN_DB_PARSING_RPT=${RPTDIR}/geo_sample_in_db_parsing.rpt
export QC_RPT CURATED_QC_RPT EXP_PARSING_RPT SAMP_PARSING_RPT
export SAMP_IN_DB_PARSING_RPT
# set to true if you want to run the parsing reports
RUN_PARSING_RPTS=true
# max number of samples to load, if greater, skip experiment
MAX_SAMPLES=1000
# Date
DATE=`date '+%Y-%m-%d'`
export RUN_PARSING_RPTS MAX_SAMPLES DATE
# BCP file names
EXPERIMENT_FILENAME=GXD_HTExperiment.bcp
ACC_FILENAME=ACC_Accession.bcp
VARIABLE_FILENAME=GXD_HTExperimentVariable.bcp
PROPERTY_FILENAME=MGI_Property.bcp
SAMPLE_FILENAME=GXD_HTRawSample.bcp
KEYVALUE_FILENAME=MGI_KeyValue.bcp
NOTE_FILENAME=MGI_Note.bcp
export EXPERIMENT_FILENAME ACC_FILENAME VARIABLE_FILENAME PROPERTY_FILENAME
export SAMPLE_FILENAME KEYVALUE_FILENAME NOTE_FILENAME
# SQL delete filename
DELETE_FILENAME=${OUTPUTDIR}/sampleDelete.sql
export DELETE_FILENAME
# Complete path name of the log files
LOG_FILE=${LOGDIR}/geo_htload.log
LOG_PROC=${LOGDIR}/geo_htload.proc.log
LOG_DIAG=${LOGDIR}/geo_htload.diag.log
LOG_CUR=${LOGDIR}/geo_htload.cur.log
LOG_VAL=${LOGDIR}/geo_htload.val.log
export LOG_FILE LOG_PROC LOG_DIAG LOG_CUR LOG_VAL
#
# GEO Experiment and sample file mirroring
#
EXPT_XML_FILE=geo.xml
GEO_DOWNLOADS=${DATADOWNLOADS}/eutils.ncbi.nlm.nih.gov.geo
GEO_DOWNLOADS_ARCHIVE="${GEO_DOWNLOADS}.archive"
# how far to go back to fetch experiments in days (this is by release date)
# if this var does not exist or is empty ("") then all experiments downloaded
EXPT_DOWNLOAD_DAYS=730
#EXPT_DOWNLOAD_DAYS=""
export EXPT_XML_FILE GEO_DOWNLOADS GEO_DOWNLOADS_ARCHIVE EXPT_DOWNLOAD_DAYS
# GEO UID file; we parse this for values to get GEO_XML_FILE
GEO_UID_FILE=${INPUTDIR}/geo.uid.xml
# GEO Experiment XML file prefix (GEO_XML_FILE.1, .2 and so on
GEO_XML_FILE=${GEO_DOWNLOADS}/${EXPT_XML_FILE}
# GEO Sample file template
GEO_SAMPLE_FILE_SUFFIX='_family.xml'
# GEO Report file
GEO_RPT_FILE=${RPTDIR}/geoUpdate.rpt
# mirror log file
GEO_MIRROR_LOG_FILE=${LOGDIR}/mirror_geo.log
# curator mirror log file - indicates when sample files cannot be downloaded
MIRROR_LOG_CUR=${LOGDIR}/mirror_geo.cur.log
export GEO_UID_FILE GEO_XML_FILE GEO_SAMPLE_FILE_SUFFIX GEO_RPT_FILE GEO_MIRROR_LOG_FILE
export MIRROR_LOG_CUR
# Send debug messages to the diagnostic log (true or false)
LOG_DEBUG=false
export LOG_DEBUG
# The name of the job stream for the load
JOBSTREAM=geo_htload
export JOBSTREAM
###########################################################################
#
# GXD HT Classifier Configuration
#
###########################################################################
# The input to the prediction script created by processRelevance.py
NOT_EVALUATED_EXPERIMENT=${OUTPUTDIR}/notevaluated.experiment.txt
# The output from the prediction script - .../lib/python_anaconda/predict.py
PREDICTED_EXPERIMENT=${OUTPUTDIR}/predicted.experiment.txt
# The log for the classifier post processing steps
LOG_EXPERIMENT=${LOGDIR}/predicted.log
export NOT_EVALUATED_EXPERIMENT PREDICTED_EXPERIMENT LOG_EXPERIMENT
###########################################################################
#
# MISCELLANEOUS SETTINGS
#
###########################################################################
# The name of the load for the subject of an email notification
MAIL_LOADNAME="GEO HT load"
# Recipient list for mailing the log files (comma-separated)
MAIL_LOG_PROC="mgiadmin"
MAIL_LOG_CUR="[email protected],[email protected]"
export MAIL_LOADNAME MAIL_LOG_PROC MAIL_LOG_CUR
INSTALLDIR=${GXDHTLOAD}
export INSTALLDIR