-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathimport_genetests_data.php
executable file
·139 lines (116 loc) · 3.68 KB
/
import_genetests_data.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/php
<?php
;
// Copyright: see COPYING
// Authors: see git-blame(1)
if ($_SERVER["argc"] != 2)
{
die ("Usage: ".$_SERVER["argv"][0]." genetests-data.txt\n");
}
$fh = fopen ($_SERVER["argv"][1], "r");
chdir ('public_html');
require_once 'lib/setup.php';
if (ini_get ("memory_limit") < 134217728)
ini_set ("memory_limit", 134217728);
print "Creating/updating get-evidence tables...";
evidence_create_tables ();
print "\n";
print "Reading input...";
$g_sql = "";
$g_sql_param = array();
$in = 0;
$out = 0;
while (($line = fgets ($fh)) !== FALSE) {
if (ereg ("^#", $line)) // comment
continue;
++$in;
$f = explode ("\t", rtrim($line, "\n"));
if ($f[4] == "na") // no gene listed
continue;
$testable = eregi ("clinical", $f[5]) ? 1 : 0;
$reviewed = $f[6] && $f[6] != "na" ? 1 : 0;
foreach (explode ("|", $f[1]) as $disease) {
foreach (explode ("|", $f[4]) as $gene) {
$g_sql .= "(?, ?, ?, ?), ";
array_push ($g_sql_param, $gene, $disease, $testable, $reviewed);
++$out;
}
}
}
print "$in inputs, $out outputs\n";
if (!$out)
exit;
print "Importing to database...";
theDb()->query ("CREATE TEMPORARY TABLE gt (
gene VARCHAR(32) NOT NULL,
disease VARCHAR(64) NOT NULL,
testable TINYINT NOT NULL,
reviewed TINYINT NOT NULL,
UNIQUE `gene_disease` (gene,disease))");
$q = theDb()->query ("INSERT IGNORE INTO gt (gene, disease, testable, reviewed) VALUES "
.ereg_replace(', $', '', $g_sql),
$g_sql_param);
if (theDb()->isError($q)) die($q->getMessage());
print theDb()->affectedRows();
print "\n";
print "Adding diseases...";
theDb()->query ("INSERT IGNORE INTO diseases (disease_name) SELECT disease FROM gt");
print theDb()->affectedRows();
print "\n";
print "Looking up disease IDs...";
theDb()->query ("ALTER TABLE gt ADD disease_id BIGINT NOT NULL");
theDb()->query ("UPDATE gt
LEFT JOIN diseases d
ON gt.disease = d.disease_name
SET gt.disease_id = d.disease_id");
print theDb()->affectedRows();
print "\n";
theDb()->query ("UNLOCK TABLES");
print "Copying to live gene>disease table...";
theDb()->query ("LOCK TABLES gene_disease,gene_canonical_name WRITE");
theDb()->query ("DELETE FROM gene_disease WHERE dbtag = ?",
array ("GeneTests"));
$q = theDb()->query ("INSERT INTO gene_disease
(gene, disease_id, dbtag)
SELECT DISTINCT IF(official is null,gene,official), disease_id, ? FROM gt
LEFT JOIN gene_canonical_name ON aka=gene",
array ("GeneTests"));
if (theDb()->isError($q)) die($q->getMessage());
print theDb()->affectedRows();
print "\n";
theDb()->query ("UNLOCK TABLES");
print "Merging genes using gene_canonical_name...";
theDb()->query ("CREATE TEMPORARY TABLE gt2 (
gene VARCHAR(32) NOT NULL,
testable TINYINT NOT NULL,
reviewed TINYINT NOT NULL,
INDEX(gene))");
$q = theDb()->query ("INSERT INTO gt2
(gene, testable, reviewed)
SELECT gene, testable, reviewed
FROM gt
LEFT JOIN gene_canonical_name ON gene=aka
WHERE aka IS NULL");
if (theDb()->isError($q)) die($q->getMessage());
print theDb()->affectedRows();
print "...";
$q = theDb()->query ("INSERT INTO gt2
(gene, testable, reviewed)
SELECT official, testable, reviewed
FROM gt
LEFT JOIN gene_canonical_name ON gene=aka
WHERE official IS NOT NULL");
if (theDb()->isError($q)) die($q->getMessage());
print theDb()->affectedRows();
print "\n";
print "Copying to live genetests table...";
theDb()->query ("LOCK TABLES genetests WRITE");
theDb()->query ("DELETE FROM genetests");
$q = theDb()->query ("INSERT INTO genetests
(gene, testable, reviewed)
SELECT gene, max(testable), max(reviewed) FROM gt2 GROUP BY gene");
if (theDb()->isError($q)) die($q->getMessage());
print theDb()->affectedRows();
print "\n";
theDb()->query ("UNLOCK TABLES");
?>