-
Notifications
You must be signed in to change notification settings - Fork 8
Web API Guide
Access the UniProt documentation here.
Requirements for fetching data: taxon and symbol.
let getUniProtInfo = function (query) {
const taxon = query.taxon;
const geneSymbol = query.symbol;
return $.get({
url: serviceRoot + "/uniprot/uploadlists/",
data: {
from: "GENENAME",
to: "ACC",
format: "tab",
taxon: taxon,
query: geneSymbol,
},
dataType: "text",
timeout: 5000,
}).then(function (data) {
const regex = new RegExp(geneSymbol + "[ \t\r\n\v\f]*([A-Z0-9]+)", "gm");
const id = regex.exec(data)[1];
return $.get({
url: serviceRoot + "/uniprot/uniprot/" + id + ".xml",
timeout: 5000,
});
}).fail(function () {
return $.get(this);
});
};
- UniProt ID
- Protein Sequence
- Protein Type
- Species Name
Access the NCBI Documentation here.
Requirements for fetching data as of beta # ( ): symbol and species name.
let getNCBIInfo = function (query) {
const geneSymbol = query.symbol;
const geneName = query.species.replace(/_/, "+");
return $.get({
url: serviceRoot + "/ncbi/entrez/eutils/esearch.fcgi",
data: {
db: "gene",
term: geneSymbol + "[gene]+" + geneName + "[Organism]",
},
dataType: "text",
timeout: 5000,
}).then(function (data) {
const regex = /<Id>(\d*)<\/Id>/gm;
const id = regex.exec(data)[1];
return $.get({
url: serviceRoot + "/ncbi/entrez/eutils/esummary.fcgi?db=gene&id=" + id,
dataType: "xml",
timeout: 5000,
});
});
};
- NCBI ID
- Locus Tag
- Alternative Name
- Chromosome Sequence
- Genomic Sequence
After browsing the capabilities of the NCBI API, I conclude it is not possible to directly retrieve gene data from strictly the taxon ID.
According to the documentation, the way to access gene information through the NCBI database is by providing an Entrez Unique Identifier (UID).
This is the purpose of the first get()
function:
This would require us knowing both the gene name and the organism name in advance though, which, presently, are both passed into the gene page as the page is created.
return $.get({
url: serviceRoot + "/ncbi/entrez/eutils/esearch.fcgi",
data: {
db: "gene",
term: geneSymbol + "[gene]+" + geneName + "[Organism]",
},
dataType: "text",
timeout: 5000,
}).
The result of this function is page of XML data, which we use to get the UID.
Example query: YHP1
<eSearchResult>
<Count>1</Count>
<RetMax>1</RetMax>
<RetStart>0</RetStart>
<IdList>
**_<Id>852062</Id>_** <!-- We want this -->
</IdList>
<TranslationSet>
<Translation>
<From>+Saccharomyces+cerevisiae[Organism]</From>
<To>"Saccharomyces cerevisiae"[Organism]</To>
</Translation>
</TranslationSet>
<TranslationStack>
<TermSet>
<Term>YHP1[gene]</Term>
<Field>gene</Field>
<Count>1</Count>
<Explode>N</Explode>
</TermSet>
<TermSet>
<Term>"Saccharomyces cerevisiae"[Organism]</Term>
<Field>Organism</Field>
<Count>7062</Count>
<Explode>Y</Explode>
</TermSet>
<OP>AND</OP>
</TranslationStack>
<QueryTranslation>
YHP1[gene] AND "Saccharomyces cerevisiae"[Organism]
</QueryTranslation>
</eSearchResult>
We take that value, and put it into a SECOND get() function to retrieve the gene data we want:
return $.get({
url: serviceRoot + "/ncbi/entrez/eutils/esummary.fcgi?db=gene&id=" + id,
dataType: "xml",
timeout: 5000,
});
As stated in my previous comment, it is possible to retrieve the species name and the gene name by accessing the taxonomy database, which would give us the organism name. It would require a third get() function to be created, however, which isn't difficult but would slow the page down slightly.
Access the Ensembl documentation here.
Requirements for API function as of beta version # (): symbol and species.
let getEnsemblInfo = function (query) {
const geneSymbol = query.symbol;
const geneSpecies = query.species;
return $.get({
url: serviceRoot + "/ensembl/lookup/symbol/" + geneSpecies + "/"
+ geneSymbol + "?content-type=application/json",
dataType: "json",
timeout: 5000
});
};
- Ensembl ID
- Ensembl Description
As of 1/28/19, the Ensembl database does not offer the requested data for_ Saccharomyces cerevisiae_. This leads future GRNsight researchers to question the viability of using Ensembl as a future data source.
It was discussed in Issue #696that the most important thing to obtain for Ensembl was the link to the Ensembl page. To access the Ensembl page for a given gene, you use the following link:
https://uswest.ensembl.org/Saccharomyces_cerevisiae/Gene/Summary?db=core;g= + locus tag
.
where the locus tag may be obtained from the NCBI API call.
Access the JASPAR API documentation (here)[http://jaspar.genereg.net/api/].
Requirements for the JASPAR API are: symbol, taxon.
let getJasparInfo = function (query) {
const geneSymbol = query.symbol;
//will eventually need to decide which taxon to use for JASPAR, for now this remains hardcoded
const taxon = "4932";
return $.get({
url: serviceRoot + "/jaspar/api/v1/matrix/?tax_id=" + taxon + "&format=json&name=" + geneSymbol.toUpperCase(),
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
}).then(function (data) {
return (data.results.length === 0 || data.results === undefined) ? {} :
$.get({
url: serviceRoot + "/jaspar/api/v1/matrix/" + data.results[0].matrix_id,
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
});
});
};let getJasparInfo = function (query) {
const geneSymbol = query.symbol;
//will eventually need to decide which taxon to use for JASPAR, for now this remains hardcoded
const taxon = "4932";
return $.get({
url: serviceRoot + "/jaspar/api/v1/matrix/?tax_id=" + taxon + "&format=json&name=" + geneSymbol.toUpperCase(),
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
}).then(function (data) {
return (data.results.length === 0 || data.results === undefined) ? {} :
$.get({
url: serviceRoot + "/jaspar/api/v1/matrix/" + data.results[0].matrix_id,
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
});
});
};
- JASPAR ID
- Class
- Family
- Sequence Logo
- Frequency Matrix
Within our API, the taxon is hardcoded as 4932. As of 1/28/19, the taxon for the rest of the APIs is 559292. When 559292 was tested as of 1/28/19 as a taxon, the API response lacked the data we seek.
Taxon ID 4932 is referring to the species Saccharomyces cerevisiae, see https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=4932, while taxon ID 559292 is a substrain of 4932, see https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=559292.
If you look at the first link, you will see that the strain S288C is included in the long list of substrains.
What is special about S288c is that it was the first strain to be sequenced completely, so it is the reference genome for yeast. However, now that many more substrains have been completely sequenced, the databases had to be able to distinguish between them.
So, in the case of our gene pages, all the data we are providing are still for the same species. Since UniProt pre-dates the time when separate sub-strain designations were made, it still uses the parent ID, 4932. However, JASPAR is referring to the sequenced strain.
This leads future GRNsight researchers to question which taxon will be used to pull data from JASPAR.
Access the Yeastmine documentation (here)[https://github.com/yeastgenome/SGDBackend-Nex2/blob/master/docs/webservice.MD].
All of the YeastMine functions require: symbol.
let getGeneOntologyInfo = function (query) {
const geneSymbol = query.symbol;
return $.get({
url: serviceRoot + "/yeastmine/backend/locus/" + geneSymbol + "/go_details",
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
}
}).catch(function () {
return defaultGeneOntology;
});
};
let getRegulationInfo = function (query) {
const geneSymbol = query.symbol;
return $.get({
url: serviceRoot + "/yeastmine/backend/locus/" + geneSymbol + "/regulation_details",
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
}
});
};
let getYeastMineInfo = function (query) {
const geneSymbol = query.symbol;
return $.get({
url: serviceRoot + "/yeastmine/webservice/locus/" + geneSymbol,
dataType: "json",
beforeSend: function (xhr) {
xhr.setRequestHeader("content-type", "application/json");
},
});
};
- Gene Ontology Data: ** Molecular Functions ** Biological Processes ** Cellular Components
- Regulators Data ** List of Regulators ** List of Targets
- Description
- SGD ID
- Standard Name (not used in page)
- Systematic Name (not used in page)
- Gene Ontology Overview