Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SCRUM-4190 #1714

Merged
merged 10 commits into from
Nov 20, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import org.alliancegenome.curation_api.exceptions.ObjectUpdateException;
import org.alliancegenome.curation_api.model.entities.GeneOntologyAnnotation;
import org.alliancegenome.curation_api.model.entities.Organization;
import org.alliancegenome.curation_api.model.entities.bulkloads.BulkFMSLoad;
import org.alliancegenome.curation_api.model.entities.bulkloads.BulkLoadFileHistory;
import org.alliancegenome.curation_api.model.entities.bulkloads.BulkURLLoad;
import org.alliancegenome.curation_api.model.ingest.dto.GeneOntologyAnnotationDTO;
import org.alliancegenome.curation_api.services.GeneOntologyAnnotationService;
import org.alliancegenome.curation_api.services.OrganizationService;
Expand All @@ -33,16 +33,11 @@ public class GeneOntologyAnnotationExecutor extends LoadFileExecutor {

public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException {

String url = ((BulkURLLoad) bulkLoadFileHistory.getBulkLoad()).getBulkloadUrl();

String[] tok = url.split("/");
String orgAbbrev = tok[tok.length - 1].toUpperCase();
String abbr = orgAbbrev.split("\\.")[0];
String abbr = ((BulkFMSLoad) bulkLoadFileHistory.getBulkLoad()).getFmsDataSubType();
Organization organization = organizationService.getByAbbr(abbr).getEntity();

// curie, List<GO curie>
Map<String, List<String>> uiMap = new HashMap<>();
Set<String> orgIDs = new HashSet<>();
GZIPInputStream stream = new GZIPInputStream(new FileInputStream(bulkLoadFileHistory.getBulkLoadFile().getLocalFilePath()));
try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) {
Stream<String> lines = br.lines();
Expand All @@ -51,10 +46,9 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException
lines.filter(s -> !s.startsWith("!") && StringUtils.isNotEmpty(s)).forEach(s -> {
String[] token = s.split("\t");
String orgID = token[0];
orgIDs.add(orgID);
String modID = token[1];
String goID = token[4];
if (abbr.equals(orgID)) {
if (abbr.equalsIgnoreCase(orgID) || orgID.equalsIgnoreCase("Xenbase") || abbr.equals("HUMAN") && orgID.equals("RGD")) {
List<String> goIDs = uiMap.computeIfAbsent(modID, list -> new ArrayList<>());
goIDs.add(goID);
}
Expand All @@ -77,7 +71,15 @@ public void execLoad(BulkLoadFileHistory bulkLoadFileHistory) throws IOException
.stream()
.map(entry -> entry.getValue().stream().map(goID -> {
GeneOntologyAnnotationDTO dto = new GeneOntologyAnnotationDTO();
dto.setGeneIdentifier(abbr + ":" + entry.getKey());
String prefix = abbr;
if (abbr.equalsIgnoreCase("XB")) {
prefix = "Xenbase";
}
if (abbr.equalsIgnoreCase("HUMAN")) {
prefix = null;
}
String geneIdentifier = prefix != null ? prefix + ":" + entry.getKey() : entry.getKey();
dto.setGeneIdentifier(geneIdentifier);
dto.setGoTermCurie(goID);
return dto;
}).toList()).flatMap(Collection::stream).toList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@
import org.alliancegenome.curation_api.response.ObjectResponse;
import org.alliancegenome.curation_api.services.base.BaseEntityCrudService;
import org.alliancegenome.curation_api.services.validation.DataProviderValidator;
import org.apache.commons.collections.CollectionUtils;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@RequestScoped
public class GeneOntologyAnnotationService extends BaseEntityCrudService<GeneOntologyAnnotation, GeneOntologyAnnotationDAO> {

private Species species;
private List<Species> species;
public static final String RESOURCE_DESCRIPTOR_PREFIX = "ENSEMBL";
public static final String RESOURCE_DESCRIPTOR_PAGE_NAME = "default";
// <crossReference.referencedCurie, DataProvider>
Expand All @@ -32,7 +35,6 @@ public class GeneOntologyAnnotationService extends BaseEntityCrudService<GeneOnt
private Map<Long, GeneOntologyAnnotationDTO> gafMap = new HashMap<>();



@Inject
@AuthenticatedUser
protected Person authenticatedPerson;
Expand Down Expand Up @@ -92,7 +94,8 @@ private void addNewRecordToMap(GeneOntologyAnnotation gafNew, GeneOntologyAnnota

public Long getGeneID(GeneOntologyAnnotationDTO uiEntity, String orgAbbreviation) {
if (accessionGeneMap.isEmpty()) {
accessionGeneMap = geneDAO.getAllGeneIdsPerSpecies(getSpecies(orgAbbreviation));
List<Species> speciesList = getSpecies(orgAbbreviation);
speciesList.forEach(species -> accessionGeneMap.putAll(geneDAO.getAllGeneIdsPerSpecies(species)));
}
Long geneID = accessionGeneMap.get(uiEntity.getGeneIdentifier());
return geneID;
Expand All @@ -106,16 +109,24 @@ private Long getGOID(GeneOntologyAnnotationDTO uiEntity) {
return goID;
}

private Species getSpecies(String orgAbbreviation) {
if (species != null) {
private List<Species> getSpecies(String orgAbbreviation) {
if (CollectionUtils.isNotEmpty(species)) {
return species;
}
Map<String, Object> map = new HashMap<>();
map.put("displayName", orgAbbreviation);
species = speciesDAO.findByParams(map).getSingleResult();
if (orgAbbreviation.equalsIgnoreCase("XB")) {
species = new ArrayList<>();
species.add(getSingleSpecies("XBXL"));
cmpich marked this conversation as resolved.
Show resolved Hide resolved
species.add(getSingleSpecies("XBXT"));
} else {
species = List.of(getSingleSpecies(orgAbbreviation));
}
return species;
}

private Species getSingleSpecies(String orgAbbreviation) {
return speciesDAO.findByField("displayName", orgAbbreviation).getSingleResult();
}

public ObjectResponse<DataProvider> validate(DataProvider uiEntity) {
return dataProviderValidator.validateDataProvider(uiEntity, null, true);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
package org.alliancegenome.curation_api.services;

import java.util.Date;
import java.util.HashMap;

import io.quarkus.logging.Log;
import jakarta.annotation.PostConstruct;
import jakarta.enterprise.context.RequestScoped;
import jakarta.inject.Inject;
import org.alliancegenome.curation_api.dao.OrganizationDAO;
import org.alliancegenome.curation_api.model.entities.Organization;
import org.alliancegenome.curation_api.response.ObjectResponse;
import org.alliancegenome.curation_api.response.SearchResponse;
import org.alliancegenome.curation_api.services.base.BaseEntityCrudService;

import io.quarkus.logging.Log;
import jakarta.annotation.PostConstruct;
import jakarta.enterprise.context.RequestScoped;
import jakarta.inject.Inject;
import java.util.Date;
import java.util.HashMap;

@RequestScoped
public class OrganizationService extends BaseEntityCrudService<Organization, OrganizationDAO> {

@Inject OrganizationDAO organizationDAO;
@Inject
OrganizationDAO organizationDAO;

Date orgRequest;
HashMap<Long, Organization> orgIdCacheMap = new HashMap<>();
Expand Down Expand Up @@ -54,7 +54,7 @@ public ObjectResponse<Organization> getById(Long id) {
public ObjectResponse<Organization> getByAbbr(String abbr) {

Organization org = null;
SearchResponse<Organization> orgResponse = null;
SearchResponse<Organization> orgResponse;

if (orgRequest != null) {
if (orgAbbrCacheMap.containsKey(abbr)) {
Expand Down
90 changes: 90 additions & 0 deletions src/main/resources/db/migration/v0.38.0.10__gaf-load.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
-- delete old bulk URL load
delete
from bulkurlload
where id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF');

delete from bulkfmsload
where id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF');

delete
from bulkscheduledload
where id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF');

delete from bulkloadfilehistory
where bulkload_id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF');

delete from bulkload
where id in (select id FROM bulkload WHERE backendbulkloadtype = 'GAF');

delete from bulkloadgroup where name = 'File Management System (FMS) GAF Loads';
-- Create bulk loads got the GAF load
INSERT INTO bulkloadgroup (id, name)
VALUES (nextval('bulkloadgroup_seq'), 'File Management System (FMS) GAF Loads');
INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id)
SELECT nextval('bulkload_seq'), 'GAF', 'FB GAF Load', 'STOPPED', id
FROM bulkloadgroup
WHERE name = 'File Management System (FMS) GAF Loads';
INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id)
SELECT nextval('bulkload_seq'), 'GAF', 'HUMAN GAF Load', 'STOPPED', id
FROM bulkloadgroup
WHERE name = 'File Management System (FMS) GAF Loads';
INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id)
SELECT nextval('bulkload_seq'), 'GAF', 'MGI GAF Load', 'STOPPED', id
FROM bulkloadgroup
WHERE name = 'File Management System (FMS) GAF Loads';
INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id)
SELECT nextval('bulkload_seq'), 'GAF', 'RGD GAF Load', 'STOPPED', id
FROM bulkloadgroup
WHERE name = 'File Management System (FMS) GAF Loads';
INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id)
SELECT nextval('bulkload_seq'), 'GAF', 'SGD GAF Load', 'STOPPED', id
FROM bulkloadgroup
WHERE name = 'File Management System (FMS) GAF Loads';
INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id)
SELECT nextval('bulkload_seq'), 'GAF', 'WB GAF Load', 'STOPPED', id
FROM bulkloadgroup
WHERE name = 'File Management System (FMS) GAF Loads';
INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id)
SELECT nextval('bulkload_seq'), 'GAF', 'XB GAF Load', 'STOPPED', id
FROM bulkloadgroup
WHERE name = 'File Management System (FMS) GAF Loads';
INSERT INTO bulkload (id, backendbulkloadtype, name, bulkloadstatus, group_id)
SELECT nextval('bulkload_seq'), 'GAF', 'ZFIN GAF Load', 'STOPPED', id
FROM bulkloadgroup
WHERE name = 'File Management System (FMS) GAF Loads';
INSERT INTO bulkscheduledload (id, cronschedule, scheduleactive)
SELECT id, '0 0 22 ? * SUN-THU', false
FROM bulkload
WHERE backendbulkloadtype = 'GAF';
INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype)
SELECT id, 'GAF', 'FB'
FROM bulkload
WHERE name = 'FB GAF Load';
INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype)
SELECT id, 'GAF', 'HUMAN'
FROM bulkload
WHERE name = 'HUMAN GAF Load';
INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype)
SELECT id, 'GAF', 'MGI'
FROM bulkload
WHERE name = 'MGI GAF Load';
INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype)
SELECT id, 'GAF', 'RGD'
FROM bulkload
WHERE name = 'RGD GAF Load';
INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype)
SELECT id, 'GAF', 'SGD'
FROM bulkload
WHERE name = 'SGD GAF Load';
INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype)
SELECT id, 'GAF', 'WB'
FROM bulkload
WHERE name = 'WB GAF Load';
INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype)
SELECT id, 'GAF', 'XB'
FROM bulkload
WHERE name = 'XB GAF Load';
INSERT INTO bulkfmsload (id, fmsdatatype, fmsdatasubtype)
SELECT id, 'GAF', 'ZFIN'
FROM bulkload
WHERE name = 'ZFIN GAF Load';
Loading